@@ -34,14 +34,34 @@ weitn --text "二点五平方电线"
3434Python usage:
3535
3636``` py
37- # tn usage
38- >> > from tn.chinese.normalizer import Normalizer
39- >> > normalizer = Normalizer()
40- >> > normalizer.normalize(" 2.5平方电线" )
41- # itn usage
42- >> > from itn.chinese.inverse_normalizer import InverseNormalizer
43- >> > invnormalizer = InverseNormalizer()
44- >> > invnormalizer.normalize(" 二点五平方电线" )
37+ from itn.chinese.inverse_normalizer import InverseNormalizer
38+ from tn.chinese.normalizer import Normalizer as ZhNormalizer
39+ from tn.english.normalizer import Normalizer as EnNormalizer
40+
41+ # NOTE (xcsong): 和默认参数不一致时,必须重新构图,要重新构图请务必指定 `overwrite_cache=True`
42+ # When the parameters differ from the defaults, it is mandatory to re-compose. To re-compose, please ensure you specify `overwrite_cache=True`.
43+
44+ zh_tn_text = " 你好 WeTextProcessing 1.0,船新版本儿,船新体验儿,简直666,9和10"
45+ zh_itn_text = " 你好 WeTextProcessing 一点零,船新版本儿,船新体验儿,简直六六六,九和六"
46+ en_tn_text = " Hello WeTextProcessing 1.0, life is short, just use wetext, 666, 9 and 10"
47+ zh_tn_model = ZhNormalizer(remove_erhua = True , overwrite_cache = True )
48+ zh_itn_model = InverseNormalizer(enable_0_to_9 = False , overwrite_cache = True )
49+ en_tn_model = EnNormalizer(overwrite_cache = True )
50+ print (" 中文 TN (去除儿化音,重新在线构图):\n\t {} => {} " .format(zh_tn_text, zh_tn_model.normalize(zh_tn_text)))
51+ print (" 中文ITN (小于10的单独数字不转换,重新在线构图):\n\t {} => {} " .format(zh_itn_text, zh_itn_model.normalize(zh_itn_text)))
52+ print (" 英文 TN (暂时还没有可控的选项,后面会加...):\n\t {} => {} \n " .format(en_tn_text, en_tn_model.normalize(en_tn_text)))
53+
54+ zh_tn_model = ZhNormalizer(overwrite_cache = False )
55+ zh_itn_model = InverseNormalizer(overwrite_cache = False )
56+ en_tn_model = EnNormalizer(overwrite_cache = False )
57+ print (" 中文 TN (复用之前编译好的图):\n\t {} => {} " .format(zh_tn_text, zh_tn_model.normalize(zh_tn_text)))
58+ print (" 中文ITN (复用之前编译好的图):\n\t {} => {} " .format(zh_itn_text, zh_itn_model.normalize(zh_itn_text)))
59+ print (" 英文 TN (复用之前编译好的图):\n\t {} => {} \n " .format(en_tn_text, en_tn_model.normalize(en_tn_text)))
60+
61+ zh_tn_model = ZhNormalizer(remove_erhua = False , overwrite_cache = True )
62+ zh_itn_model = InverseNormalizer(enable_0_to_9 = True , overwrite_cache = True )
63+ print (" 中文 TN (不去除儿化音,重新在线构图):\n\t {} => {} " .format(zh_tn_text, zh_tn_model.normalize(zh_tn_text)))
64+ print (" 中文ITN (小于10的单独数字也进行转换,重新在线构图):\n\t {} => {} \n " .format(zh_itn_text, zh_itn_model.normalize(zh_itn_text)))
4565```
4666
4767#### 1.2 Advanced Usage:
0 commit comments