From db8b5f801ceda25f39a0d2f5fc86a1da2fc7a618 Mon Sep 17 00:00:00 2001 From: Plachta Date: Mon, 13 Feb 2023 14:17:54 +0800 Subject: [PATCH] upload files --- .idea/.gitignore | 3 + .idea/VITS_voice_conversion.iml | 12 ++ .idea/inspectionProfiles/Project_Default.xml | 154 ++++++++++++++++++ .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + .../VITS_vc_ft-checkpoint.ipynb | 6 + VITS_vc_ft.ipynb | 33 ++++ user_voice/user_voice.txt | 45 +++++ user_voice/user_voice.txt.cleaned | 45 +++++ user_voice_collect.py | 71 ++++++++ 12 files changed, 393 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/VITS_voice_conversion.iml create mode 100644 .idea/inspectionProfiles/Project_Default.xml create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 .ipynb_checkpoints/VITS_vc_ft-checkpoint.ipynb create mode 100644 VITS_vc_ft.ipynb create mode 100644 user_voice/user_voice.txt create mode 100644 user_voice/user_voice.txt.cleaned create mode 100644 user_voice_collect.py diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/VITS_voice_conversion.iml b/.idea/VITS_voice_conversion.iml new file mode 100644 index 0000000..e3dac1e --- /dev/null +++ b/.idea/VITS_voice_conversion.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..7390b08 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,154 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..d6e0253 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..9238ba4 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/.ipynb_checkpoints/VITS_vc_ft-checkpoint.ipynb b/.ipynb_checkpoints/VITS_vc_ft-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/VITS_vc_ft-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/VITS_vc_ft.ipynb b/VITS_vc_ft.ipynb new file mode 100644 index 0000000..713deb2 --- /dev/null +++ b/VITS_vc_ft.ipynb @@ -0,0 +1,33 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "2c6bd36c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/user_voice/user_voice.txt b/user_voice/user_voice.txt new file mode 100644 index 0000000..40e1e71 --- /dev/null +++ b/user_voice/user_voice.txt @@ -0,0 +1,45 @@ +0.wav|999|[ZH]所以,人的内在拥有对于人的幸福才是最关键的。[ZH] +1.wav|999|[ZH]正因为在大多数情形下人的自身内在相当贫乏,[ZH] +2.wav|999|[ZH]所以,那些再也用不着与生活的匮乏作斗争的人,[ZH] +3.wav|999|[ZH]他们之中的大多数从根本上还是感觉闷闷不乐。[ZH] +4.wav|999|[ZH]情形就跟那些还在生活的困苦中搏斗的人一般无异。[ZH] +5.wav|999|[ZH]他们内在空虚、感觉意识呆滞、思想匮乏,[ZH] +6.wav|999|[ZH]这些就驱使他们投入社交人群中。[ZH] +7.wav|999|[ZH]组成那些社交圈子的人也正是他们这一类的人。[ZH] +8.wav|999|[ZH]“因为相同羽毛的鸟聚在一块”。[ZH] +9.wav|999|[ZH]他们聚在一块追逐消遣、娱乐。[ZH] +10.wav|999|[ZH]他们以放纵感官的欢娱、极尽声色的享受开始,[ZH] +11.wav|999|[ZH]以荒唐、无度而告终。[ZH] +12.wav|999|[ZH]众多刚刚踏入生活的纨绔子弟穷奢极欲,[ZH] +13.wav|999|[ZH]在令人难以置信的极短时间内就把大部分家财挥霍殆尽。[ZH] +14.wav|999|[ZH]这种做派,其根源确实不是别的,正是无聊[ZH] +18.wav|999|[ZH]它源自上述的精神贫乏和空虚。[ZH] +16.wav|999|[ZH]一个外在富有、但内在贫乏的富家子弟来到这个世界,[ZH] +17.wav|999|[ZH]会徒劳地用外在的财富去补偿内在的不足;[ZH] +18.wav|999|[ZH]他渴望从外部得到一切,[ZH] +19.wav|999|[ZH]这情形就好比试图以少女的汗水去强健自己体魄的老朽之人。[ZH] +20.wav|999|[ZH]人自身内在的贫乏由此导致了外在财富的贫乏。[ZH] +21.wav|999|[ZH]至于另外两项人生好处的重要性,[ZH] +22.wav|999|[ZH]不需要我特别强调。[ZH] +23.wav|999|[ZH]财产的价值在当今是人所公认的,[ZH] +24.wav|999|[ZH]用不着为其宣传介绍。[ZH] +25.wav|999|[ZH]比起第二项的好处,[ZH] +26.wav|999|[ZH]第三项的好处具有一种相当飘渺的成分,[ZH] +27.wav|999|[ZH]因为名誉、名望、地位等[ZH] +28.wav|999|[ZH]全由他人的意见构成。[ZH] +29.wav|999|[ZH]每人都可以争取得到名誉,[ZH] +30.wav|999|[ZH]亦即清白的名声;[ZH] +31.wav|999|[ZH]但社会地位,则只有月盼国家政府的人才能染指;[ZH] +32.wav|999|[ZH]至于显赫的名望就只有极少数人才会得到。[ZH] +33.wav|999|[ZH]在所有这些当中,[ZH] +34.wav|999|[ZH]名誉是弥足珍贵的;[ZH] +35.wav|999|[ZH]显赫的名望则是人所希望得到的价值至昂的东西,[ZH] +36.wav|999|[ZH]那是天之骄子才能得到的金羊毛。[ZH] +37.wav|999|[ZH]另一方面,[ZH] +38.wav|999|[ZH]只有傻瓜才会把社会地位放置在财产之前。[ZH] +39.wav|999|[ZH]另外,人拥有的财产、物品和名誉、声望,[ZH] +40.wav|999|[ZH]是处于一种所谓的互为影响、促进的关系。[ZH] +41.wav|999|[ZH]彼得尼斯说过:“一个人所拥有的财产决定了这个人在他人眼中的价值”。[ZH] +42.wav|999|[ZH]如果这句话是正确的话,[ZH] +43.wav|999|[ZH]那么,反过来,他人对自己的良好评价,[ZH] +44.wav|999|[ZH]能以各种形式帮助自己获取财产。[ZH] \ No newline at end of file diff --git a/user_voice/user_voice.txt.cleaned b/user_voice/user_voice.txt.cleaned new file mode 100644 index 0000000..2ad9f69 --- /dev/null +++ b/user_voice/user_voice.txt.cleaned @@ -0,0 +1,45 @@ +0.wav|999|swo↓↑i↓↑, ɹ`ən↑ t⁼ə neɪ↓ts⁼aɪ↓ jʊŋ→joʊ↓↑ t⁼weɪ↓ɥ↑ ɹ`ən↑ t⁼ə ʃiŋ↓fu↑ tsʰaɪ↑ s`ɹ`↓ ts⁼weɪ↓ k⁼wan→tʃ⁼jɛn↓ t⁼ə. +1.wav|999|ts`⁼əŋ↓ in→weɪ↓ ts⁼aɪ↓ t⁼a↓t⁼wo→s`u↓ tʃʰiŋ↑ʃiŋ↑ ʃja↓ɹ`ən↑ t⁼ə ts⁼ɹ↓s`ən→ neɪ↓ts⁼aɪ↓ ʃiɑŋ→t⁼ɑŋ→ pʰin↑fa↑, +2.wav|999|swo↓↑i↓↑, na↓ʃiɛ→ ts⁼aɪ↓iɛ↓↑ jʊŋ↓p⁼u↓ts`⁼ə ɥ↓↑ s`əŋ→xwo↑ t⁼ə kʰweɪ↓fa↑ ts⁼wo↓ t⁼oʊ↓ts`⁼əŋ→ t⁼ə ɹ`ən↑, +3.wav|999|tʰa→mən ts`⁼ɹ`→ts`⁼ʊŋ→ t⁼ə t⁼a↓t⁼wo→s`u↓ tsʰʊŋ↑k⁼ən→p⁼ən↓↑s`ɑŋ↓ xaɪ↑s`ɹ`↓ k⁼an↓↑tʃ⁼ɥɛ↑ mən↓mən↓p⁼u↓lə↓. +4.wav|999|tʃʰiŋ↑ʃiŋ↑ tʃ⁼joʊ↓ k⁼ən→ na↓ʃiɛ→ xaɪ↑ ts⁼aɪ↓ s`əŋ→xwo↑ t⁼ə kʰwən↓kʰu↓↑ ts`⁼ʊŋ→ p⁼wo↑t⁼oʊ↓ t⁼ə ɹ`ən↑ i↓p⁼an→ u↑i↓. +5.wav|999|tʰa→mən neɪ↓ts⁼aɪ↓ kʰʊŋ→ʃɥ→, k⁼an↓↑tʃ⁼ɥɛ↑ i↓s`ɹ`↑ t⁼aɪ→ts`⁼ɹ`↓, sɹ→ʃiɑŋ↓↑ kʰweɪ↓fa↑, +6.wav|999|ts`⁼ə↓ʃiɛ→ tʃ⁼joʊ↓ tʃʰɥ→s`ɹ`↓↑ tʰa→mən tʰoʊ↑ɹ`u↓ s`ə↓tʃ⁼iɑʊ→ ɹ`ən↑tʃʰɥn↑ ts`⁼ʊŋ→. +7.wav|999|ts⁼u↓↑ts`ʰəŋ↑ na↓ʃiɛ→ s`ə↓tʃ⁼iɑʊ→tʃʰɥæn→ts⁼ɹ t⁼ə ɹ`ən↑ iɛ↓↑ ts`⁼əŋ↓s`ɹ`↓ tʰa→mən ts`⁼ə↓ i→leɪ↓ t⁼ə ɹ`ən↑. +8.wav|999|“ in→weɪ↓ ʃiɑŋ→tʰʊŋ↑ ɥ↓↑mɑʊ↑ t⁼ə niɑʊ↓↑ tʃ⁼ɥ↓ ts⁼aɪ↓ i→kʰwaɪ↓”. +9.wav|999|tʰa→mən tʃ⁼ɥ↓ts⁼aɪ↓ i→kʰwaɪ↓ ts`⁼weɪ→ts`⁼u↑ ʃiɑʊ→tʃʰjɛn↓↑, ɥ↑lə↓. +10.wav|999|tʰa→mən i↓↑ fɑŋ↓ts⁼ʊŋ↓ k⁼an↓↑k⁼wan→ t⁼ə xwan→ɥ↑, tʃ⁼i↑tʃ⁼in↓↑ s`əŋ→sə↓ t⁼ə ʃiɑŋ↓↑s`oʊ↓ kʰaɪ→s`ɹ`↓↑, +11.wav|999|i↓↑ xuɑŋ→tʰɑŋ↑, u↑t⁼u↓ əɹ`↑ k⁼ɑʊ↓ts`⁼ʊŋ→. +12.wav|999|ts`⁼ʊŋ↓t⁼wo→ k⁼ɑŋ→k⁼ɑŋ→ tʰa↓ɹ`u↓ s`əŋ→xwo↑ t⁼ə wan↑kʰu↓ts⁼ɹ↓↑t⁼i↓ tʃʰjʊŋ↑s`ə→tʃ⁼i↑ɥ↓, +13.wav|999|ts⁼aɪ↓ liŋ↓ɹ`ən↑ nan↑i↓↑ts`⁼ɹ`↓ʃin↓ t⁼ə tʃ⁼i↑ t⁼wan↓↑s`ɹ`↑tʃ⁼jɛn→ neɪ↓ tʃ⁼joʊ↓ p⁼a↓↑ t⁼a↓p⁼u↓fən↓ tʃ⁼ja→tsʰaɪ↑ xweɪ→xwo↓ t⁼aɪ↓tʃ⁼in↓. +14.wav|999|ts`⁼ə↓ts`⁼ʊŋ↓↑ ts⁼wo↓pʰaɪ↓, tʃʰi↑ k⁼ən→ɥæn↑ tʃʰɥɛ↓s`ɹ`↑ p⁼u↑s`ɹ`↓ p⁼iɛ↑t⁼ə, ts`⁼əŋ↓s`ɹ`↓ u↑liɑʊ↑. +18.wav|999|tʰa→ ɥæn↑ts⁼ɹ↓ s`ɑŋ↓s`u↓ t⁼ə tʃ⁼iŋ→s`ən↑ pʰin↑fa↑ xə↑ kʰʊŋ→ʃɥ→. +16.wav|999|i↑k⁼ə↓ waɪ↓ ts⁼aɪ↓ fu↓joʊ↓↑, t⁼an↓ neɪ↓ts⁼aɪ↓ pʰin↑fa↑ t⁼ə fu↓tʃ⁼ja→ts⁼ɹ↓↑t⁼i↓ laɪ↑t⁼ɑʊ↓ ts`⁼ə↓k⁼ə↓ s`ɹ`↓tʃ⁼iɛ↓, +17.wav|999|xweɪ↓ tʰu↑lɑʊ↑t⁼i↓ jʊŋ↓waɪ↓ ts⁼aɪ↓ t⁼ə tsʰaɪ↑fu↓ tʃʰɥ↓ p⁼u↓↑ts`ʰɑŋ↑ neɪ↓ts⁼aɪ↓ t⁼ə p⁼u↓ts⁼u↑, +18.wav|999|tʰa→ kʰə↓↑uɑŋ↓ tsʰʊŋ↑ waɪ↓p⁼u↓ t⁼ə↑t⁼ɑʊ↓ i→tʃʰiɛ↓, +19.wav|999|ts`⁼ə↓ tʃʰiŋ↑ʃiŋ↑ tʃ⁼joʊ↓ xɑʊ↓↑p⁼i↓↑ s`ɹ`↓tʰu↑ i↓↑ s`ɑʊ↓nɥ↓↑ t⁼ə xan↓s`weɪ↓↑ tʃʰɥ↓ tʃʰiɑŋ↑tʃ⁼jɛn↓ ts⁼ɹ↓tʃ⁼i↓↑ tʰi↓↑pʰwo↓ t⁼ə lɑʊ↓↑ʃjoʊ↓↑ ts`⁼ɹ`→ ɹ`ən↑. +20.wav|999|ɹ`ən↑ ts⁼ɹ↓s`ən→ neɪ↓ts⁼aɪ↓ t⁼ə pʰin↑fa↑ joʊ↑tsʰɹ↓↑ t⁼ɑʊ↓↑ts`⁼ɹ`↓ lə waɪ↓ ts⁼aɪ↓ tsʰaɪ↑fu↓ t⁼ə pʰin↑fa↑. +21.wav|999|ts`⁼ɹ`↓ɥ↑ liŋ↓waɪ↓ liɑŋ↓↑ʃiɑŋ↓ ɹ`ən↑s`əŋ→ xɑʊ↓↑ts`ʰu↓ t⁼ə ts`⁼ʊŋ↓iɑʊ↓ʃiŋ↓, +22.wav|999|p⁼u↓ ʃɥ→iɑʊ↓ wo↓↑ tʰə↓p⁼iɛ↑tʃʰiɑŋ↑t⁼iɑʊ↓. +23.wav|999|tsʰaɪ↑ts`ʰan↓↑ t⁼ə tʃ⁼ja↓ts`⁼ɹ`↑ ts⁼aɪ↓ t⁼ɑŋ→tʃ⁼in→ s`ɹ`↓ ɹ`ən↑ swo↓↑ k⁼ʊŋ→ɹ`ən↓ t⁼ə, +24.wav|999|jʊŋ↓p⁼u↓ts`⁼ə weɪ↓ tʃʰi↑ ʃɥæn→ts`ʰwan↑ tʃ⁼iɛ↓s`ɑʊ↓. +25.wav|999|p⁼i↓↑tʃʰi↓↑ t⁼i↓əɹ`↓ʃiɑŋ↓ t⁼ə xɑʊ↓↑ts`ʰu↓, +26.wav|999|t⁼i↓san→ʃiɑŋ↓ t⁼ə xɑʊ↓↑ts`ʰu↓ tʃ⁼ɥ↓joʊ↓↑ i→ts`⁼ʊŋ↓↑ ʃiɑŋ→t⁼ɑŋ→ pʰiɑʊ→miɑʊ↓↑ t⁼ə ts`ʰəŋ↑fən↓, +27.wav|999|in→weɪ↓ miŋ↑ɥ↓, miŋ↑uɑŋ↓, t⁼i↓weɪ↓ t⁼əŋ↓↑. +28.wav|999|tʃʰɥæn↑ joʊ↑ tʰa→ɹ`ən↑ t⁼ə i↓tʃ⁼jɛn↓ k⁼oʊ↓ts`ʰəŋ↑. +29.wav|999|meɪ↓↑ɹ`ən↑ t⁼oʊ→ kʰə↓↑i↓↑ ts`⁼əŋ→tʃʰɥ↓↑ t⁼ə↑t⁼ɑʊ↓ miŋ↑ɥ↓, +30.wav|999|i↓ tʃ⁼i↑ tʃʰiŋ→p⁼aɪ↑ t⁼ə miŋ↑s`əŋ→, +31.wav|999|t⁼an↓ s`ə↓xweɪ↓ t⁼i↓weɪ↓, ts⁼ə↑ ts`⁼ɹ`↓↑joʊ↓↑ ɥɛ↓ pʰan↓ k⁼wo↑tʃ⁼ja→ ts`⁼əŋ↓fu↓↑ t⁼ə ɹ`ən↑tsʰaɪ↑ nəŋ↑ ɹ`an↓↑ts`⁼ɹ`↓↑, +32.wav|999|ts`⁼ɹ`↓ɥ↑ ʃjɛn↓↑xə↓ t⁼ə miŋ↑uɑŋ↓ tʃ⁼joʊ↓ ts`⁼ɹ`↓↑joʊ↓↑ tʃ⁼i↑s`ɑʊ↓↑s`u↓ ɹ`ən↑tsʰaɪ↑ xweɪ↓ t⁼ə↑t⁼ɑʊ↓. +33.wav|999|ts⁼aɪ↓ swo↓↑joʊ↓↑ ts`⁼ə↓ʃiɛ→ t⁼ɑŋ→ts`⁼ʊŋ→, +34.wav|999|miŋ↑ɥ↓ s`ɹ`↓ mi↑ts⁼u↑ts`⁼ən→k⁼weɪ↓ t⁼ə, +35.wav|999|ʃjɛn↓↑xə↓ t⁼ə miŋ↑uɑŋ↓ ts⁼ə↑ s`ɹ`↓ ɹ`ən↑ swo↓↑ ʃi→uɑŋ↓ t⁼ə↑t⁼ɑʊ↓ t⁼ə tʃ⁼ja↓ts`⁼ɹ`↑ ts`⁼ɹ`↓ɑŋ↑ t⁼ə t⁼ʊŋ→ʃi→, +36.wav|999|na↓ s`ɹ`↓ tʰjɛn→ts`⁼ɹ`→tʃ⁼iɑʊ→ts⁼ɹ tsʰaɪ↑nəŋ↑ t⁼ə↑t⁼ɑʊ↓ t⁼ə tʃ⁼in→ iɑŋ↑mɑʊ↑. +37.wav|999|liŋ↓i↓fɑŋ→mjɛn↓, +38.wav|999|ts`⁼ɹ`↓↑joʊ↓↑ s`a↓↑k⁼wa→ tsʰaɪ↑ xweɪ↓ p⁼a↓↑ s`ə↓xweɪ↓ t⁼i↓weɪ↓ fɑŋ↓ts`⁼ɹ`↓ ts⁼aɪ↓ tsʰaɪ↑ts`ʰan↓↑ ts`⁼ɹ`→tʃʰjɛn↑. +39.wav|999|liŋ↓waɪ↓, ɹ`ən↑ jʊŋ→joʊ↓↑ t⁼ə tsʰaɪ↑ts`ʰan↓↑, u↓pʰin↓↑ xə↑ miŋ↑ɥ↓, s`əŋ→uɑŋ↓, +40.wav|999|s`ɹ`↓ ts`ʰu↓↑ɥ↑ i→ts`⁼ʊŋ↓↑ swo↓↑weɪ↓ t⁼ə xu↓weɪ↓ iŋ↓↑ʃiɑŋ↓↑, tsʰu↓tʃ⁼in↓ t⁼ə k⁼wan→ʃi↓. +41.wav|999|p⁼i↓↑t⁼ə↑ ni↑sɹ→ s`wo→ k⁼wo↓,“ i↑k⁼ə↓ ɹ`ən↑ swo↓↑ jʊŋ→joʊ↓↑ t⁼ə tsʰaɪ↑ts`ʰan↓↑ tʃ⁼ɥɛ↑t⁼iŋ↓ lə ts`⁼ə↓k⁼ə↓ ɹ`ən↑ ts⁼aɪ↓ tʰa→ɹ`ən↑ jɛn↓↑ts`⁼ʊŋ→ t⁼ə tʃ⁼ja↓ts`⁼ɹ`↑”. +42.wav|999|ɹ`u↑k⁼wo↓↑ ts`⁼ə↓tʃ⁼ɥ↓ xwa↓ s`ɹ`↓ ts`⁼əŋ↓tʃʰɥɛ↓ t⁼əxwa↓, +43.wav|999|na↓mə, fan↓↑k⁼wo↓laɪ↑, tʰa→ɹ`ən↑ t⁼weɪ↓ ts⁼ɹ↓tʃ⁼i↓↑ t⁼ə liɑŋ↑xɑʊ↓↑ pʰiŋ↑tʃ⁼ja↓, +44.wav|999|nəŋ↑i↓↑ k⁼ə↓ts`⁼ʊŋ↓↑ ʃiŋ↑s`ɹ`↓ p⁼ɑŋ→ts`⁼u↓ ts⁼ɹ↓tʃ⁼i↓↑ xwo↓tʃʰɥ↓↑ tsʰaɪ↑ts`ʰan↓↑. diff --git a/user_voice_collect.py b/user_voice_collect.py new file mode 100644 index 0000000..85e021d --- /dev/null +++ b/user_voice_collect.py @@ -0,0 +1,71 @@ +import numpy as np +import torch +import torchaudio +import gradio as gr + +anno_lines = [] +with open("./user_voice/user_voice.txt", 'r', encoding='utf-8') as f: + for line in f.readlines(): + anno_lines.append(line.strip("\n")) + +text_index = 0 + +def display_text(index): + index = int(index) + global text_index + text_index = index + return f"{text_index}: " + anno_lines[index].split("|")[2].strip("[ZH]") + +def display_prev_text(): + global text_index + if text_index != 0: + text_index -= 1 + return f"{text_index}: " + anno_lines[text_index].split("|")[2].strip("[ZH]") + +def display_next_text(): + global text_index + if text_index != len(anno_lines)-1: + text_index += 1 + return f"{text_index}: " + anno_lines[text_index].split("|")[2].strip("[ZH]") + +def save_audio(audio): + global text_index + if audio: + sr, wav = audio + wav = torch.tensor(wav).type(torch.float32) / max(wav.max(), -wav.min()) + wav = wav.unsqueeze(0) if len(wav.shape) == 1 else wav + if sr != 22050: + res_wav = torchaudio.transforms.Resample(orig_freq=sr, new_freq=22050)(wav) + else: + res_wav = wav + torchaudio.save(f"./user_voice/{str(text_index)}.wav", res_wav, 22050, channels_first=True) + return f"Audio saved to ./user_voice/{str(text_index)}.wav successfully!" + else: + return "Error: Please record your audio!" + + +if __name__ == "__main__": + app = gr.Blocks() + with app: + with gr.Row(): + text = gr.Textbox(value="0: " + anno_lines[0].split("|")[2].strip("[ZH]"), label="Please read the text here") + with gr.Row(): + audio_to_collect = gr.Audio(source="microphone") + with gr.Row(): + with gr.Column(): + prev_btn = gr.Button(value="Previous") + with gr.Column(): + next_btn = gr.Button(value="Next") + with gr.Row(): + index_dropdown = gr.Dropdown(choices=[str(i) for i in range(len(anno_lines))], value="0", + label="No. of text", interactive=True) + with gr.Row(): + with gr.Column(): + save_btn = gr.Button(value="Save Audio") + with gr.Column(): + audio_save_message = gr.Textbox(label="Message") + index_dropdown.change(display_text, inputs=index_dropdown, outputs=text) + prev_btn.click(display_prev_text, inputs=None, outputs=text) + next_btn.click(display_next_text, inputs=None, outputs=text) + save_btn.click(save_audio, inputs=audio_to_collect, outputs=audio_save_message) + app.launch() \ No newline at end of file