Skip to content

Commit bc285a3

Browse files
committed
Added category enum proto
1 parent a8bd0cf commit bc285a3

9 files changed

Lines changed: 214 additions & 28 deletions

File tree

collector/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ go_library(
55
srcs = ["collector.go"],
66
importpath = "github.com/its-my-data/doubak/collector",
77
visibility = ["//visibility:public"],
8+
deps = ["//proto"],
89
)

collector/collector.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@ package collector
33
import (
44
"flag"
55
"fmt"
6+
p "github.com/its-my-data/doubak/proto"
67
)
78

89
// Collect starts the major collection process.
910
func Collect() {
10-
fmt.Println(flag.Lookup("tasks").Value.(flag.Getter).Get().(string))
11+
user := flag.Lookup(p.Flag_categories.String()).Value.(flag.Getter).Get().(string)
12+
fmt.Println(user)
1113
}

doubak.go

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,39 @@ import (
55
"fmt"
66
"github.com/gocolly/colly/v2"
77
"github.com/its-my-data/doubak/collector"
8-
"github.com/its-my-data/doubak/proto"
8+
p "github.com/its-my-data/doubak/proto"
99
"math"
1010
"time"
1111
)
1212

1313
// Defining flags.
14-
var userName = flag.String("user", "", "The Douban user name. e.g. mewcatcher")
15-
var tasksToRun = flag.String("tasks", "collect, parse, publish",
14+
var userName = flag.String(p.Flag_user.String(), "",
15+
"The Douban user name. e.g. mewcatcher")
16+
var tasksToRun = flag.String(p.Flag_tasks.String(),
17+
p.ConcatProtoEnum(p.Task_name, ", "),
1618
"Tasks to run (order doesn't matter). Can be one/more of the following: "+
17-
"collect, parse, publish.")
18-
var targetCategories = flag.String("categories", "",
19+
p.ConcatProtoEnum(p.Task_name, ", ")+".")
20+
var targetCategories = flag.String(p.Flag_categories.String(),
21+
p.ConcatProtoEnum(p.Category_name, ", "),
1922
"A comma separated content types list to crawl. Default is all. "+
20-
"Supported types are: book, movie, music, game, app, review.")
21-
var outputDir = flag.String("output_dir", "./output", "The output path.")
22-
var continueRun = flag.Bool("continue", true,
23+
"Supported types are: "+p.ConcatProtoEnum(p.Category_name, ", ")+".")
24+
var outputDir = flag.String(p.Flag_output_dir.String(), "./output",
25+
"The output path.")
26+
var continueRun = flag.Bool(p.Flag_continue.String(), true,
2327
"Continue or restart with override.")
24-
var proxy = flag.String("proxy", "", "Proxy to use when crawling.")
25-
var numRetry = flag.Uint64("max_retry", math.MaxUint64,
28+
var proxy = flag.String(p.Flag_proxy.String(), "",
29+
"Proxy to use when crawling.")
30+
var numRetry = flag.Uint64(p.Flag_max_retry.String(), math.MaxUint64,
2631
"The number of retries when errors encountered.")
2732
var defaultRequestDelay, _ = time.ParseDuration("100ms")
28-
var requestDelay = flag.Duration("req_delay", defaultRequestDelay,
29-
"Delay betwee two requests, used to control QPS. This may be replaced by "+
30-
"a QPS flag when proxy pool and parallel requests are added.")
33+
var requestDelay = flag.Duration(p.Flag_req_delay.String(), defaultRequestDelay,
34+
"Min time between any two requests, used to reduce server load. This may "+
35+
"be replaced by a QPS flag when proxy pool and parallel requests are implemented.")
3136

3237
func main() {
3338
flag.Parse()
3439

3540
collector.Collect()
36-
fmt.Println(proto.Flag_user.String() + proto.ConcatProtoEnum(nil, ""))
3741

3842
c := colly.NewCollector()
3943
c.OnHTML("a[href]", func(e *colly.HTMLElement) {

proto/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library")
55
proto_library(
66
name = "proto_proto",
77
srcs = [
8+
"category.proto",
89
"flag.proto",
910
"task.proto",
1011
],

proto/category.pb.go

Lines changed: 95 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proto/category.proto

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
syntax = "proto3";
2+
3+
package proto;
4+
5+
option go_package = "github.com/its-my-data/doubak/proto";
6+
7+
// Category defines the supported categories.
8+
// Full list supported by Douban are:
9+
// - 书籍 book
10+
// - 电影 movie
11+
// - 音乐 music
12+
// - 游戏 game
13+
// - 移动应用 app
14+
// - 评论 review
15+
// - 小组 group (not supported)
16+
// - 日记 note (not supported)
17+
// - 图片 album (not supported)
18+
// - 小站 site (not supported)
19+
// - 同城活动 activity (not supported)
20+
// - 舞台剧 drama (not supported)
21+
// - 豆品 thing (not supported)
22+
enum Category {
23+
book = 0;
24+
movie = 1;
25+
music = 2;
26+
game = 3;
27+
app = 4;
28+
review = 5;
29+
}

proto/flag.pb.go

Lines changed: 39 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proto/flag.proto

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,22 @@ enum Flag {
1111

1212
// Tasks to run.
1313
tasks = 1;
14+
15+
// Categories to run on.
16+
categories = 2;
17+
18+
// Output path/directory.
19+
output_dir = 3;
20+
21+
// Continue running or starting over with overriding existing files.
22+
continue = 4;
23+
24+
// Proxy used to send each request via.
25+
proxy = 5;
26+
27+
// Max number of retries.
28+
max_retry = 6;
29+
30+
// Min time between any two requets.
31+
req_delay = 7;
1432
}

proto/util.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,14 @@
11
package proto
22

3+
import (
4+
"strings"
5+
)
6+
37
// ConcatProtoEnum concats enum proto values to a string.
4-
func ConcatProtoEnum(p interface{}, separator string) string {
5-
var _ Task
6-
return ""
8+
func ConcatProtoEnum(nameMap map[int32]string, separator string) string {
9+
list := []string{}
10+
for _, v := range nameMap {
11+
list = append(list, v)
12+
}
13+
return strings.Join(list, separator)
714
}

0 commit comments

Comments
 (0)