diff --git a/preprocess_v2.py b/preprocess_v2.py index 14bc16b..54cd4c4 100644 --- a/preprocess_v2.py +++ b/preprocess_v2.py @@ -111,7 +111,7 @@ if __name__ == "__main__": path, speaker, txt = line.split("|") if len(txt) > 150: continue - cleaned_text = text._clean_text(txt, hps['data']['text_cleaners']) + cleaned_text = text._clean_text(txt, hps['data']['text_cleaners']).replace("[ZH]", "") cleaned_text += "\n" if not cleaned_text.endswith("\n") else "" cleaned_new_annos.append(path + "|" + str(speaker2id[speaker]) + "|" + cleaned_text) diff --git a/text/cleaners.py b/text/cleaners.py index 4a8696a..6c7b809 100644 --- a/text/cleaners.py +++ b/text/cleaners.py @@ -39,7 +39,7 @@ def korean_cleaners(text): def chinese_cleaners(text): from pypinyin import Style, pinyin - + text = text.replace("[ZH]", "") phones = [phone[0] for phone in pinyin(text, style=Style.TONE3)] return ' '.join(phones)