Skip to content

Commit 1c3018d

Browse files
authored
[refactor] support building fst online (#230)
1 parent 385e35f commit 1c3018d

24 files changed

Lines changed: 128 additions & 71 deletions

itn/chinese/inverse_normalizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __init__(self,
3737
enable_standalone_number=True,
3838
enable_0_to_9=False,
3939
enable_million=False):
40-
super().__init__(name='inverse_normalizer', ordertype='itn')
40+
super().__init__(name='zh_inverse_normalizer', ordertype='itn')
4141
self.convert_number = enable_standalone_number
4242
self.enable_0_to_9 = enable_0_to_9
4343
self.enable_million = enable_million

itn/chinese/rules/cardinal.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
from tn.processor import Processor
16+
from tn.utils import get_abs_path
1617

1718
from pynini import cross, accep, string_file
1819
from pynini.lib.pynutil import delete, insert, add_weight
@@ -34,14 +35,20 @@ def __init__(self,
3435
self.build_verbalizer()
3536

3637
def build_tagger(self):
37-
zero = string_file('itn/chinese/data/number/zero.tsv') # 0
38-
digit = string_file('itn/chinese/data/number/digit.tsv') # 1 ~ 9
38+
zero = string_file(
39+
get_abs_path('../itn/chinese/data/number/zero.tsv')) # 0
40+
digit = string_file(
41+
get_abs_path('../itn/chinese/data/number/digit.tsv')) # 1 ~ 9
3942
special_tilde = string_file(
40-
'itn/chinese//data/number/special_tilde.tsv') # 七八十->70~80
43+
get_abs_path(
44+
'../itn/chinese/data/number/special_tilde.tsv')) # 七八十->70~80
4145
special_dash = string_file(
42-
'itn/chinese//data/number/special_dash.tsv') # 七八十->70-80
43-
sign = string_file('itn/chinese/data/number/sign.tsv') # + -
44-
dot = string_file('itn/chinese/data/number/dot.tsv') # .
46+
get_abs_path(
47+
'../itn/chinese/data/number/special_dash.tsv')) # 七八十->70-80
48+
sign = string_file(
49+
get_abs_path('../itn/chinese/data/number/sign.tsv')) # + -
50+
dot = string_file(
51+
get_abs_path('../itn/chinese/data/number/dot.tsv')) # .
4552

4653
# 0. 基础数字
4754
addzero = insert('0')

itn/chinese/rules/date.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
from tn.processor import Processor
16+
from tn.utils import get_abs_path
1617

1718
from pynini import string_file, accep
1819
from pynini.lib.pynutil import delete, insert
@@ -26,14 +27,16 @@ def __init__(self):
2627
self.build_verbalizer()
2728

2829
def build_tagger(self):
29-
digit = string_file('itn/chinese/data/number/digit.tsv') # 1 ~ 9
30-
zero = string_file('itn/chinese/data/number/zero.tsv') # 0
30+
digit = string_file(
31+
get_abs_path('../itn/chinese/data/number/digit.tsv')) # 1 ~ 9
32+
zero = string_file(
33+
get_abs_path('../itn/chinese/data/number/zero.tsv')) # 0
3134

3235
yyyy = digit + (digit | zero)**3 # 二零零八年
3336
yyy = digit + (digit | zero)**2 # 公元一六八年
3437
yy = (digit | zero)**2 # 零八年奥运会
35-
mm = string_file('itn/chinese/data/date/mm.tsv')
36-
dd = string_file('itn/chinese/data/date/dd.tsv')
38+
mm = string_file(get_abs_path('../itn/chinese/data/date/mm.tsv'))
39+
dd = string_file(get_abs_path('../itn/chinese/data/date/dd.tsv'))
3740

3841
year = insert('year: "') + (yyyy | yyy | yy) + \
3942
delete('年') + insert('" ')

itn/chinese/rules/fraction.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from itn.chinese.rules.cardinal import Cardinal
1616
from tn.processor import Processor
17+
from tn.utils import get_abs_path
1718

1819
from pynini import string_file
1920
from pynini.lib.pynutil import delete, insert, add_weight
@@ -28,7 +29,8 @@ def __init__(self):
2829

2930
def build_tagger(self):
3031
number = Cardinal().number
31-
sign = string_file('itn/chinese/data/number/sign.tsv') # + -
32+
sign = string_file(
33+
get_abs_path('../itn/chinese/data/number/sign.tsv')) # + -
3234

3335
# NOTE(xcsong): default weight = 1.0, set to -1.0 means higher priority
3436
# For example,

itn/chinese/rules/license_plate.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
from tn.processor import Processor
16+
from tn.utils import get_abs_path
1617

1718
from pynini import string_file
1819
from pynini.lib.pynutil import insert
@@ -26,11 +27,14 @@ def __init__(self):
2627
self.build_verbalizer()
2728

2829
def build_tagger(self):
29-
digit = string_file('itn/chinese/data/number/digit.tsv') # 1 ~ 9
30-
zero = string_file('itn/chinese/data/number/zero.tsv') # 0
30+
digit = string_file(
31+
get_abs_path('../itn/chinese/data/number/digit.tsv')) # 1 ~ 9
32+
zero = string_file(
33+
get_abs_path('../itn/chinese/data/number/zero.tsv')) # 0
3134
digits = zero | digit
3235
province = string_file(
33-
'itn/chinese/data/license_plate/province.tsv') # 皖
36+
get_abs_path(
37+
'../itn/chinese/data/license_plate/province.tsv')) # 皖
3438
license_plate = province + self.ALPHA + (self.ALPHA | digits)**5
3539
license_plate |= province + self.ALPHA + (self.ALPHA | digits)**6
3640
tagger = insert('value: "') + license_plate + insert('"')

itn/chinese/rules/math.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from itn.chinese.rules.cardinal import Cardinal
1616
from tn.processor import Processor
17+
from tn.utils import get_abs_path
1718

1819
from pynini import string_file
1920
from pynini.lib.pynutil import insert
@@ -27,7 +28,8 @@ def __init__(self):
2728
self.build_verbalizer()
2829

2930
def build_tagger(self):
30-
operator = string_file('itn/chinese/data/math/operator.tsv')
31+
operator = string_file(
32+
get_abs_path('../itn/chinese/data/math/operator.tsv'))
3133

3234
number = Cardinal().number
3335
tagger = (number + (operator + number).plus)

itn/chinese/rules/measure.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from itn.chinese.rules.cardinal import Cardinal
1616
from tn.processor import Processor
17+
from tn.utils import get_abs_path
1718

1819
from pynini import string_file, accep, cross
1920
from pynini.lib.pynutil import delete, insert, add_weight
@@ -29,9 +30,12 @@ def __init__(self, exclude_one=True, enable_0_to_9=True):
2930
self.build_verbalizer()
3031

3132
def build_tagger(self):
32-
units_en = string_file('itn/chinese/data/measure/units_en.tsv')
33-
units_zh = string_file('itn/chinese/data/measure/units_zh.tsv')
34-
sign = string_file('itn/chinese/data/number/sign.tsv') # + -
33+
units_en = string_file(
34+
get_abs_path('../itn/chinese/data/measure/units_en.tsv'))
35+
units_zh = string_file(
36+
get_abs_path('../itn/chinese/data/measure/units_zh.tsv'))
37+
sign = string_file(
38+
get_abs_path('../itn/chinese/data/number/sign.tsv')) # + -
3539
to = cross('到', '~') | cross('到百分之', '~')
3640

3741
units = add_weight(

itn/chinese/rules/money.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from itn.chinese.rules.cardinal import Cardinal
1616
from tn.processor import Processor
17+
from tn.utils import get_abs_path
1718

1819
from pynini import string_file
1920
from pynini.lib.pynutil import delete, insert
@@ -28,9 +29,11 @@ def __init__(self, enable_0_to_9=True):
2829
self.build_verbalizer()
2930

3031
def build_tagger(self):
31-
code = string_file('itn/chinese/data/money/code.tsv')
32-
symbol = string_file('itn/chinese/data/money/symbol.tsv')
33-
digit = string_file('itn/chinese/data/number/digit.tsv') # 1 ~ 9
32+
code = string_file(get_abs_path('../itn/chinese/data/money/code.tsv'))
33+
symbol = string_file(
34+
get_abs_path('../itn/chinese/data/money/symbol.tsv'))
35+
digit = string_file(
36+
get_abs_path('../itn/chinese/data/number/digit.tsv')) # 1 ~ 9
3437

3538
number = Cardinal().number if self.enable_0_to_9 else \
3639
Cardinal().number_exclude_0_to_9

itn/chinese/rules/postprocessor.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# limitations under the License.
1515

1616
from tn.processor import Processor
17+
from tn.utils import get_abs_path
1718

1819
from pynini import string_file
1920
from pynini.lib.pynutil import delete
@@ -23,7 +24,8 @@ class PostProcessor(Processor):
2324

2425
def __init__(self, remove_interjections=True):
2526
super().__init__(name='postprocessor')
26-
blacklist = string_file('itn/chinese/data/default/blacklist.tsv')
27+
blacklist = string_file(
28+
get_abs_path('../itn/chinese/data/default/blacklist.tsv'))
2729

2830
processor = self.VSIGMA
2931
if remove_interjections:

itn/chinese/rules/time.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
from tn.processor import Processor
16+
from tn.utils import get_abs_path
1617

1718
from pynini import string_file
1819
from pynini.lib.pynutil import delete, insert
@@ -26,10 +27,10 @@ def __init__(self):
2627
self.build_verbalizer()
2728

2829
def build_tagger(self):
29-
h = string_file('itn/chinese/data/time/hour.tsv')
30-
m = string_file('itn/chinese/data/time/minute.tsv')
31-
s = string_file('itn/chinese/data/time/second.tsv')
32-
noon = string_file('itn/chinese/data/time/noon.tsv')
30+
h = string_file(get_abs_path('../itn/chinese/data/time/hour.tsv'))
31+
m = string_file(get_abs_path('../itn/chinese/data/time/minute.tsv'))
32+
s = string_file(get_abs_path('../itn/chinese/data/time/second.tsv'))
33+
noon = string_file(get_abs_path('../itn/chinese/data/time/noon.tsv'))
3334

3435
tagger = ((insert('noon: "') + noon + insert('" ')).ques +
3536
insert('hour: "') + h + insert('"') + insert(' minute: "') +

0 commit comments

Comments
 (0)