Skip to content

Commit fd566f4

Browse files
authored
[tn] english, fix crash on "" (#249)
* [tn] english, fix crash on "" * [tn] english, fix crash on ""
1 parent 053507e commit fd566f4

3 files changed

Lines changed: 27 additions & 13 deletions

File tree

tn/english/normalizer.py

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,21 @@ def build_tagger(self):
5757
punct = add_weight(Punctuation().tagger, 2.00)
5858
rang = add_weight(Range().tagger, 1.01)
5959
# TODO(xcsong): add roman
60-
tagger = punct.star + \
61-
(cardinal | ordinal | word
62-
| date | decimal | fraction
63-
| time | measure | money
60+
tagger = \
61+
(cardinal
62+
| ordinal
63+
| word
64+
| date
65+
| decimal
66+
| fraction
67+
| time
68+
| measure
69+
| money
6470
| telephone | electronic
6571
| whitelist
66-
| rang).optimize() + (punct.plus | self.DELETE_SPACE)
72+
| rang
73+
| punct
74+
).optimize() + (punct.plus | self.DELETE_SPACE)
6775
# delete the last space
6876
self.tagger = tagger.star @ self.build_rule(delete(' '), r='[EOS]')
6977

@@ -83,14 +91,20 @@ def build_verbalizer(self):
8391
punct = Punctuation().verbalizer
8492
rang = Range().verbalizer
8593
verbalizer = \
86-
(cardinal | ordinal | word
87-
| date | decimal
88-
| fraction | time
89-
| measure | money
94+
(cardinal
95+
| ordinal
96+
| word
97+
| date
98+
| decimal
99+
| fraction
100+
| time
101+
| measure
102+
| money
90103
| telephone
91104
| electronic
92105
| whitelist
93106
| punct
94-
| rang).optimize() + (punct.plus | self.INSERT_SPACE)
107+
| rang
108+
).optimize() + (punct.plus | self.INSERT_SPACE)
95109
self.verbalizer = verbalizer.star @ self.build_rule(delete(' '),
96110
r='[EOS]')

tn/english/test/data/normalizer.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ The National Map, accessed April 1, 2011" Site Description of Koppers Co. From t
33
.345" and ".456" "9.456" or 6.7890" => point three four five" and ".four hundred and fifty six" "nine point four five six" or six point seven eight nine oh"
44
The museum is open Mon.-Sun. children of 3-4 years 123 The plan will help you lose 3-4 pounds the first week, and 1-2 pounds the weeks thereafter. => The museum is open Monday to Sunday children of three to four years one hundred and twenty three The plan will help you lose three to four pounds the first week, and one to two pounds the weeks thereafter.
55
Try searching for 'Toyota' or 'Investment' => Try searching for 'Toyota' or 'Investment'
6+
"" => ""

tn/main.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,8 @@ def main():
7373
full_to_half=str2bool(args.full_to_half),
7474
tag_oov=str2bool(args.tag_oov))
7575
elif args.language == "en":
76-
normalizer = EnNormalizer(
77-
cache_dir=args.cache_dir,
78-
overwrite_cache=args.overwrite_cache)
76+
normalizer = EnNormalizer(cache_dir=args.cache_dir,
77+
overwrite_cache=args.overwrite_cache)
7978

8079
if args.text:
8180
print(normalizer.tag(args.text))

0 commit comments

Comments
 (0)