11# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
2+ # Copyright (c) 2024, WENET COMMUNITY. Xingchen Song (sxc19@tsinghua.org.cn).
23#
34# Licensed under the Apache License, Version 2.0 (the "License");
45# you may not use this file except in compliance with the License.
1213# See the License for the specific language governing permissions and
1314# limitations under the License.
1415
15-
1616import pynini
1717from pynini .lib import pynutil
1818
19-
2019from tn .processor import Processor
2120from tn .utils import get_abs_path , load_labels
2221from tn .english .rules .ordinal import Ordinal
2322
2423
2524class Roman (Processor ):
25+
2626 def __init__ (self , deterministic : bool = False ):
2727 """
2828 Args:
@@ -39,9 +39,11 @@ def build_tagger(self):
3939 Finite state transducer for classifying roman numbers:
4040 e.g. "IV" -> roman { integer: "four" }
4141 """
42- roman_dict = load_labels (get_abs_path ("english/data/roman/roman_to_spoken.tsv" ))
42+ roman_dict = load_labels (
43+ get_abs_path ("english/data/roman/roman_to_spoken.tsv" ))
4344 default_graph = pynini .string_map (roman_dict ).optimize ()
44- default_graph = pynutil .insert ("integer: \" " ) + default_graph + pynutil .insert ("\" " )
45+ default_graph = pynutil .insert (
46+ "integer: \" " ) + default_graph + pynutil .insert ("\" " )
4547 ordinal_limit = 19
4648
4749 if self .deterministic :
@@ -50,56 +52,52 @@ def build_tagger(self):
5052 else :
5153 start_idx = 0
5254
53- graph_teens = pynini .string_map ([x [0 ] for x in roman_dict [start_idx :ordinal_limit ]]).optimize ()
55+ graph_teens = pynini .string_map (
56+ [x [0 ] for x in roman_dict [start_idx :ordinal_limit ]]).optimize ()
5457
5558 # roman numerals up to ordinal_limit with a preceding name are converted to ordinal form
5659 names = get_names ()
57- graph = (
58- pynutil .insert ("key_the_ordinal: \" " )
59- + names
60- + pynutil .insert ("\" " )
61- + pynini .accep (" " )
62- + graph_teens @ default_graph
63- ).optimize ()
60+ graph = (pynutil .insert ("key_the_ordinal: \" " ) + names +
61+ pynutil .insert ("\" " ) + pynini .accep (" " ) +
62+ graph_teens @ default_graph ).optimize ()
6463
6564 # single symbol roman numerals with preceding key words (multiple formats) are converted to cardinal form
6665 key_words = []
67- for k_word in load_labels (get_abs_path ("english/data/roman/key_word.tsv" )):
66+ for k_word in load_labels (
67+ get_abs_path ("english/data/roman/key_word.tsv" )):
6868 key_words .append (k_word )
6969 key_words .append ([k_word [0 ][0 ].upper () + k_word [0 ][1 :]])
7070 key_words .append ([k_word [0 ].upper ()])
7171
7272 key_words = pynini .string_map (key_words ).optimize ()
73- graph |= (
74- pynutil . insert ( "key_cardinal: \" " ) + key_words + pynutil .insert ("\" " ) + pynini .accep (" " ) + default_graph
75- ).optimize ()
73+ graph |= (pynutil . insert ( "key_cardinal: \" " ) + key_words +
74+ pynutil .insert ("\" " ) + pynini .accep (" " ) +
75+ default_graph ).optimize ()
7676
7777 if self .deterministic :
7878 # two digit roman numerals up to 49
7979 roman_to_cardinal = pynini .compose (
8080 pynini .closure (self .ALPHA , 2 ),
81- (
82- pynutil .insert ("default_cardinal: \" default\" " )
83- + (pynini .string_map ([x [0 ] for x in roman_dict [:50 ]]).optimize ()) @ default_graph
84- ),
81+ (pynutil .insert ("default_cardinal: \" default\" " ) +
82+ (pynini .string_map ([x [0 ] for x in roman_dict [:50 ]
83+ ]).optimize ()) @ default_graph ),
8584 )
8685 graph |= roman_to_cardinal
8786 else :
8887 # two or more digit roman numerals
8988 roman_to_cardinal = pynini .compose (
9089 pynini .difference (pynini .closure (self .VCHAR ), "I" ),
91- (
92- pynutil .insert ("default_cardinal: \" default\" integer: \" " )
93- + pynini .string_map (roman_dict ).optimize ()
94- + pynutil .insert ("\" " )
95- ),
90+ (pynutil .insert ("default_cardinal: \" default\" integer: \" " ) +
91+ pynini .string_map (roman_dict ).optimize () +
92+ pynutil .insert ("\" " )),
9693 ).optimize ()
9794 graph |= roman_to_cardinal
9895
9996 # convert three digit roman or up with suffix to ordinal
10097 roman_to_ordinal = pynini .compose (
10198 pynini .closure (self .ALPHA , 3 ),
102- (pynutil .insert ("default_ordinal: \" default\" " ) + graph_teens @ default_graph + pynutil .delete ("th" )),
99+ (pynutil .insert ("default_ordinal: \" default\" " ) +
100+ graph_teens @ default_graph + pynutil .delete ("th" )),
103101 )
104102
105103 graph |= roman_to_ordinal
@@ -117,34 +115,22 @@ def build_verbalizer(self):
117115 cardinal = pynini .closure (self .NOT_QUOTE )
118116 ordinal = pynini .compose (cardinal , suffix )
119117
120- graph = (
121- pynutil .delete ("key_cardinal: \" " )
122- + pynini .closure (self .NOT_QUOTE , 1 )
123- + pynutil .delete ("\" " )
124- + pynini .accep (" " )
125- + pynutil .delete ("integer: \" " )
126- + cardinal
127- + pynutil .delete ("\" " )
128- ).optimize ()
129-
130- graph |= (
131- pynutil .delete ("default_cardinal: \" default\" integer: \" " ) + cardinal + pynutil .delete ("\" " )
132- ).optimize ()
133-
134- graph |= (
135- pynutil .delete ("default_ordinal: \" default\" integer: \" " ) + ordinal + pynutil .delete ("\" " )
136- ).optimize ()
137-
138- graph |= (
139- pynutil .delete ("key_the_ordinal: \" " )
140- + pynini .closure (self .NOT_QUOTE , 1 )
141- + pynutil .delete ("\" " )
142- + pynini .accep (" " )
143- + pynutil .delete ("integer: \" " )
144- + pynini .closure (pynutil .insert ("the " ), 0 , 1 )
145- + ordinal
146- + pynutil .delete ("\" " )
147- ).optimize ()
118+ graph = (pynutil .delete ("key_cardinal: \" " ) +
119+ pynini .closure (self .NOT_QUOTE , 1 ) + pynutil .delete ("\" " ) +
120+ pynini .accep (" " ) + pynutil .delete ("integer: \" " ) + cardinal +
121+ pynutil .delete ("\" " )).optimize ()
122+
123+ graph |= (pynutil .delete ("default_cardinal: \" default\" integer: \" " ) +
124+ cardinal + pynutil .delete ("\" " )).optimize ()
125+
126+ graph |= (pynutil .delete ("default_ordinal: \" default\" integer: \" " ) +
127+ ordinal + pynutil .delete ("\" " )).optimize ()
128+
129+ graph |= (pynutil .delete ("key_the_ordinal: \" " ) +
130+ pynini .closure (self .NOT_QUOTE , 1 ) + pynutil .delete ("\" " ) +
131+ pynini .accep (" " ) + pynutil .delete ("integer: \" " ) +
132+ pynini .closure (pynutil .insert ("the " ), 0 , 1 ) + ordinal +
133+ pynutil .delete ("\" " )).optimize ()
148134
149135 delete_tokens = self .delete_tokens (graph )
150136 self .verbalizer = delete_tokens .optimize ()
0 commit comments