@@ -5,35 +5,39 @@ import (
55 "fmt"
66 "github.com/gocolly/colly/v2"
77 "github.com/its-my-data/doubak/collector"
8- "github.com/its-my-data/doubak/proto"
8+ p "github.com/its-my-data/doubak/proto"
99 "math"
1010 "time"
1111)
1212
1313// Defining flags.
14- var userName = flag .String ("user" , "" , "The Douban user name. e.g. mewcatcher" )
15- var tasksToRun = flag .String ("tasks" , "collect, parse, publish" ,
14+ var userName = flag .String (p .Flag_user .String (), "" ,
15+ "The Douban user name. e.g. mewcatcher" )
16+ var tasksToRun = flag .String (p .Flag_tasks .String (),
17+ p .ConcatProtoEnum (p .Task_name , ", " ),
1618 "Tasks to run (order doesn't matter). Can be one/more of the following: " +
17- "collect, parse, publish." )
18- var targetCategories = flag .String ("categories" , "" ,
19+ p .ConcatProtoEnum (p .Task_name , ", " )+ "." )
20+ var targetCategories = flag .String (p .Flag_categories .String (),
21+ p .ConcatProtoEnum (p .Category_name , ", " ),
1922 "A comma separated content types list to crawl. Default is all. " +
20- "Supported types are: book, movie, music, game, app, review." )
21- var outputDir = flag .String ("output_dir" , "./output" , "The output path." )
22- var continueRun = flag .Bool ("continue" , true ,
23+ "Supported types are: " + p .ConcatProtoEnum (p .Category_name , ", " )+ "." )
24+ var outputDir = flag .String (p .Flag_output_dir .String (), "./output" ,
25+ "The output path." )
26+ var continueRun = flag .Bool (p .Flag_continue .String (), true ,
2327 "Continue or restart with override." )
24- var proxy = flag .String ("proxy" , "" , "Proxy to use when crawling." )
25- var numRetry = flag .Uint64 ("max_retry" , math .MaxUint64 ,
28+ var proxy = flag .String (p .Flag_proxy .String (), "" ,
29+ "Proxy to use when crawling." )
30+ var numRetry = flag .Uint64 (p .Flag_max_retry .String (), math .MaxUint64 ,
2631 "The number of retries when errors encountered." )
2732var defaultRequestDelay , _ = time .ParseDuration ("100ms" )
28- var requestDelay = flag .Duration ("req_delay" , defaultRequestDelay ,
29- "Delay betwee two requests, used to control QPS . This may be replaced by " +
30- "a QPS flag when proxy pool and parallel requests are added ." )
33+ var requestDelay = flag .Duration (p . Flag_req_delay . String () , defaultRequestDelay ,
34+ "Min time between any two requests, used to reduce server load . This may " +
35+ "be replaced by a QPS flag when proxy pool and parallel requests are implemented ." )
3136
3237func main () {
3338 flag .Parse ()
3439
3540 collector .Collect ()
36- fmt .Println (proto .Flag_user .String () + proto .ConcatProtoEnum (nil , "" ))
3741
3842 c := colly .NewCollector ()
3943 c .OnHTML ("a[href]" , func (e * colly.HTMLElement ) {
0 commit comments