@@ -42,9 +42,13 @@ def build_tagger(self):
4242 special_tilde = string_file (
4343 get_abs_path (
4444 '../itn/chinese/data/number/special_tilde.tsv' )) # 七八十->70~80
45+ special_tilde = special_tilde + add_weight (
46+ (accep ("万" ) | accep ("亿" )), - 0.1 ).ques
4547 special_dash = string_file (
4648 get_abs_path (
4749 '../itn/chinese/data/number/special_dash.tsv' )) # 七八十->70-80
50+ special_dash = special_dash + add_weight (
51+ (accep ("万" ) | accep ("亿" )), - 0.1 ).ques
4852 sign = string_file (
4953 get_abs_path ('../itn/chinese/data/number/sign.tsv' )) # + -
5054 dot = string_file (
@@ -101,12 +105,14 @@ def build_tagger(self):
101105 (number + accep ('亿' ) + delete ('零' ).ques ).ques + number )
102106 # 负的xxx 1.11, 1.01
103107 number = sign .ques + number + (dot + digits .plus ).ques
104- # 五六万 => 5~6万,三五千 => 3000~5000,六七百 => 600~700,三四十 => 30~40
108+ # 五六万 => 5~6万,三五千 => 3000~5000,六七百 => 600~700,三四十 => 30~40, 三四十亿 => 30~40亿
105109 number |= special_tilde
106- # 十七八 => 17-8, 四十五六 => 45-6, 三百七八十 => 370-80
110+ # 十七八 => 17-8, 四十五六 => 45-6, 三百七八十 => 370-80, 四十五六万 => 45-6万, 一万六七 => 16000-7000
107111 _special_dash = cross ('十' , '1' ) + special_dash
108112 _special_dash |= digit + delete ('十' ) + special_dash
109113 _special_dash |= digit + delete ('百' ) + special_dash
114+ _special_dash |= digit + delete ('万' ) + digit + insert (
115+ '000-' ) + digit + insert ('000' )
110116 number |= _special_dash
111117
112118 self .number = number .optimize ()
0 commit comments