Skip to content

Commit eacfc8b

Browse files
authored
[tn] english, fix <p> (#251)
1 parent fd566f4 commit eacfc8b

2 files changed

Lines changed: 10 additions & 5 deletions

File tree

tn/english/rules/punctuation.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,15 @@ def build_tagger(self):
6161
punct = closure(self.punct | cross('\\', '\\\\\\') | cross('"', '\\"'),
6262
1)
6363

64-
emphasis = (
64+
self.emphasis = (
6565
accep("<") +
6666
((
6767
closure(self.NOT_SPACE - union("<", ">"), 1) + # noqa
6868
closure(accep("/"), 0, 1)) # noqa
6969
| (accep("/") + closure(self.NOT_SPACE - union("<", ">"), 1))) +
7070
accep(">")) # noqa
71-
punct = plurals._priority_union(emphasis, punct, closure(self.VCHAR))
71+
punct = plurals._priority_union(self.emphasis, punct,
72+
closure(self.VCHAR))
7273

7374
self.graph = punct
7475
final_graph = insert("v: \"") + add_weight(
@@ -78,7 +79,10 @@ def build_tagger(self):
7879

7980
def build_verbalizer(self):
8081
punct = closure(
81-
self.punct | cross('\\\\\\', '\\') | cross('\\"', '"')
82-
| accep(" "), 1)
83-
verbalizer = delete('v: "') + punct + delete('"')
82+
self.punct | self.emphasis | cross('\\\\\\', '\\')
83+
| cross('\\"', '"'), 1)
84+
verbalizer = delete('v: "') + add_weight(accep(" "), -1.0).star \
85+
+ punct \
86+
+ add_weight(accep(" "), -1.0).star \
87+
+ delete('"')
8488
self.verbalizer = self.delete_tokens(verbalizer)

tn/english/test/data/normalizer.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ The National Map, accessed April 1, 2011" Site Description of Koppers Co. From t
44
The museum is open Mon.-Sun. children of 3-4 years 123 The plan will help you lose 3-4 pounds the first week, and 1-2 pounds the weeks thereafter. => The museum is open Monday to Sunday children of three to four years one hundred and twenty three The plan will help you lose three to four pounds the first week, and one to two pounds the weeks thereafter.
55
Try searching for 'Toyota' or 'Investment' => Try searching for 'Toyota' or 'Investment'
66
"" => ""
7+
The HTML tag <p> defines a paragraph. => The HTML tag <p> defines a paragraph.

0 commit comments

Comments
 (0)