NVIDIA
diff --git a/‎PyTorch/SpeechSynthesis/FastPitch/README.md‎
Lines changed: 174 additions & 50 deletions b/‎PyTorch/SpeechSynthesis/FastPitch/README.md‎
Lines changed: 174 additions & 50 deletions
diff --git a/‎PyTorch/SpeechSynthesis/FastPitch/audio/com_SF_ce1514_fastpitch_waveglow.wav‎
473 KB b/‎PyTorch/SpeechSynthesis/FastPitch/audio/com_SF_ce1514_fastpitch_waveglow.wav‎
473 KB
diff --git a/‎PyTorch/SpeechSynthesis/FastPitch/common/text/symbols.py‎
Lines changed: 13 additions & 1 deletion b/‎PyTorch/SpeechSynthesis/FastPitch/common/text/symbols.py‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎PyTorch/SpeechSynthesis/FastPitch/common/text/text_processing.py‎
Lines changed: 10 additions & 0 deletions b/‎PyTorch/SpeechSynthesis/FastPitch/common/text/text_processing.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎PyTorch/SpeechSynthesis/FastPitch/common/text/zh/chinese.py‎
Lines changed: 81 additions & 0 deletions b/‎PyTorch/SpeechSynthesis/FastPitch/common/text/zh/chinese.py‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎PyTorch/SpeechSynthesis/FastPitch/common/text/zh/mandarin_text_processing.py‎
Lines changed: 74 additions & 0 deletions b/‎PyTorch/SpeechSynthesis/FastPitch/common/text/zh/mandarin_text_processing.py‎
Lines changed: 74 additions & 0 deletions
@@ -31,14 +31,26 @@ def get_symbols(symbol_set='english_basic'):
         _accented = 'áçéêëñöøćž'
         _letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
         symbols = list(_punctuation + _math + _special + _accented + _letters) + _arpabet
+    elif symbol_set == 'english_mandarin_basic':
+        from .zh.chinese import chinese_punctuations, valid_symbols as mandarin_valid_symbols
+
+        # Prepend "#" to mandarin phonemes to ensure uniqueness (some are the same as uppercase letters):
+        _mandarin_phonemes = ['#' + s for s in mandarin_valid_symbols]
+
+        _pad = '_'
+        _punctuation = '!\'(),.:;? '
+        _chinese_punctuation = ["#" + p for p in chinese_punctuations]
+        _special = '-'
+        _letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
+        symbols = list(_pad + _special + _punctuation + _letters) + _arpabet + _mandarin_phonemes + _chinese_punctuation
     else:
         raise Exception("{} symbol set does not exist".format(symbol_set))
 
     return symbols
 
 
 def get_pad_idx(symbol_set='english_basic'):
-    if symbol_set in {'english_basic', 'english_basic_lowercase'}:
+    if symbol_set in {'english_basic', 'english_basic_lowercase', 'english_mandarin_basic'}:
         return 0
     else:
         raise Exception("{} symbol set not used yet".format(symbol_set))
@@ -162,3 +162,13 @@ def encode_text(self, text, return_all=False):
             return text_encoded, text_clean, text_arpabet
 
         return text_encoded
+
+
+def get_text_processing(symbol_set, text_cleaners, p_arpabet):
+    if symbol_set in ['english_basic', 'english_basic_lowercase', 'english_expanded']:
+        return TextProcessing(symbol_set, text_cleaners, p_arpabet=p_arpabet)
+    elif symbol_set == 'english_mandarin_basic':
+        from common.text.zh.mandarin_text_processing import MandarinTextProcessing
+        return MandarinTextProcessing(symbol_set, text_cleaners, p_arpabet=p_arpabet)
+    else:
+        raise ValueError(f"No TextProcessing for symbol set {symbol_set} unknown.")
@@ -0,0 +1,81 @@
+# *****************************************************************************
+#  Copyright (c) 2021-2022, NVIDIA CORPORATION.  All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#      * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+#      * Redistributions in binary form must reproduce the above copyright
+#        notice, this list of conditions and the following disclaimer in the
+#        documentation and/or other materials provided with the distribution.
+#      * Neither the name of the NVIDIA CORPORATION nor the
+#        names of its contributors may be used to endorse or promote products
+#        derived from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+#  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#  DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+#  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+#  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+#  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# *****************************************************************************
+
+import re
+
+from pypinyin import lazy_pinyin, Style
+
+
+valid_symbols = ['^', 'A', 'AI', 'AN', 'ANG', 'AO', 'B', 'C', 'CH', 'D', 
+                 'E', 'EI', 'EN', 'ENG', 'ER', 'F', 'G', 'H', 'I', 'IE', 
+                 'IN', 'ING', 'IU', 'J', 'K', 'L', 'M', 'N', 'O', 'ONG', 
+                 'OU', 'P', 'Q', 'R', 'S', 'SH', 'T', 'U', 'UI', 'UN', 
+                 'V', 'VE', 'VN', 'W', 'X', 'Y', 'Z', 'ZH']
+tones = ['1', '2', '3', '4', '5']
+chinese_punctuations = "，。？！；：、‘’“”（）【】「」《》"
+valid_symbols += tones
+
+
+def load_pinyin_dict(path="common/text/zh/pinyin_dict.txt"):
+    with open(path) as f:
+        return {l.split()[0]: l.split()[1:] for l in f}
+
+pinyin_dict = load_pinyin_dict()
+
+
+def is_chinese(text):
+    return u'\u4e00' <= text[0] <= u'\u9fff' or text[0] in chinese_punctuations
+
+
+def split_text(text):
+    regex = r'([\u4e00-\u9fff' + chinese_punctuations + ']+)'
+    return re.split(regex, text)
+
+
+def chinese_text_to_symbols(text):
+    symbols = []
+    phonemes_and_tones = ""
+    
+    # convert text to mandarin pinyin sequence
+    # ignore polyphonic words as it has little effect on training
+    pinyin_seq = lazy_pinyin(text, style=Style.TONE3)
+    
+    for item in pinyin_seq:
+        if item in chinese_punctuations:
+            symbols += [item]
+            phonemes_and_tones += ' ' + item
+            continue
+        if not item[-1].isdigit():
+           item += '5'
+        item, tone = item[:-1], item[-1]
+        phonemes = pinyin_dict[item.upper()]
+        symbols += phonemes
+        symbols += [tone]
+        
+        phonemes_and_tones += '{' + ' '.join(phonemes + [tone]) + '}'
+    
+    return symbols, phonemes_and_tones
@@ -0,0 +1,74 @@
+import re
+import numpy as np
+from .chinese import split_text, is_chinese, chinese_text_to_symbols
+from ..text_processing import TextProcessing
+
+
+class MandarinTextProcessing(TextProcessing):
+    def __init__(self, symbol_set, cleaner_names, p_arpabet=0.0,
+                 handle_arpabet='word', handle_arpabet_ambiguous='ignore',
+                 expand_currency=True):
+        
+        super().__init__(symbol_set, cleaner_names, p_arpabet, handle_arpabet, 
+                       handle_arpabet_ambiguous, expand_currency)
+
+
+    def sequence_to_text(self, sequence):
+        result = ''
+        
+        tmp = ''
+        for symbol_id in sequence:
+            if symbol_id in self.id_to_symbol:
+                s = self.id_to_symbol[symbol_id]
+                # Enclose ARPAbet and mandarin phonemes back in curly braces:
+                if len(s) > 1 and s[0] == '@':
+                    s = '{%s}' % s[1:]
+                    result += s
+                elif len(s) > 1 and s[0] == '#' and s[1].isdigit(): # mandarin tone
+                    tmp += s[1] + '} '
+                    result += tmp
+                    tmp = ''
+                elif len(s) > 1 and s[0] == '#' and (s[1].isalpha() or s[1] == '^'): # mandarin phoneme
+                    if tmp == '':
+                        tmp += ' {' + s[1:] + ' '
+                    else:
+                        tmp += s[1:] + ' '
+                elif len(s) > 1 and s[0] == '#': # chinese punctuation
+                    s = s[1]
+                    result += s
+                else:
+                    result += s
+                    
+        return result.replace('}{', ' ').replace('  ', ' ')
+
+    
+    def chinese_symbols_to_sequence(self, symbols):
+        return self.symbols_to_sequence(['#' + s for s in symbols])
+
+
+    def encode_text(self, text, return_all=False):
+        # split the text into English and Chinese segments
+        segments = [segment for segment in split_text(text) if segment != ""]
+        
+        text_encoded = []
+        text_clean = ""
+        text_arpabet = ""
+        
+        for segment in segments:
+            if is_chinese(segment[0]): # process the Chinese segment
+                chinese_symbols, segment_arpabet = chinese_text_to_symbols(segment)
+                segment_encoded = self.chinese_symbols_to_sequence(chinese_symbols)
+                segment_clean = segment
+                segment_encoded = segment_encoded
+            else: # process the English segment
+                segment_encoded, segment_clean, segment_arpabet = \
+                    super().encode_text(segment, return_all=True)
+            
+            text_encoded += segment_encoded
+            text_clean += segment_clean
+            text_arpabet += segment_arpabet
+
+        if return_all:
+            return text_encoded, text_clean, text_arpabet
+
+        return text_encoded