rearranged repo

2023-04-21 21:39:41 +08:00
parent 2612e5dbcc
commit 3d7e4220d4
7 changed files with 28 additions and 28 deletions
@@ -1,11 +1,11 @@
 import os
 import json
 import torchaudio
-raw_audio_dir = "../raw_audio/"
-denoise_audio_dir = "../denoised_audio/"
+raw_audio_dir = "./raw_audio/"
+denoise_audio_dir = "./denoised_audio/"
 filelist = list(os.walk(raw_audio_dir))[0][2]
 # 2023/4/21: Get the target sampling rate
-with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
+with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
    hps = json.load(f)
 target_sr = hps['data']['sampling_rate']
 for file in filelist:
@@ -6,7 +6,7 @@ import torchaudio
 import librosa
 import torch
 import argparse
-parent_dir = "../denoised_audio/"
+parent_dir = "./denoised_audio/"
 filelist = list(os.walk(parent_dir))[0][2]
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
@@ -29,7 +29,7 @@ if __name__ == "__main__":
            'zh': "[ZH]",
        }
    assert(torch.cuda.is_available()), "Please enable GPU in order to run Whisper!"
-    with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
+    with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
        hps = json.load(f)
    target_sr = hps['data']['sampling_rate']
    model = whisper.load_model(args.whisper_size)
@@ -70,6 +70,6 @@ if __name__ == "__main__":
    if len(speaker_annos) == 0:
        print("Warning: no long audios & videos found, this IS expected if you have only uploaded short audios")
        print("this IS NOT expected if you have uploaded any long audios, videos or video links. Please check your file structure or make sure your audio/video language is supported.")
-    with open("../long_character_anno.txt", 'w', encoding='utf-8') as f:
+    with open("./long_character_anno.txt", 'w', encoding='utf-8') as f:
        for line in speaker_annos:
            f.write(line)
@@ -19,8 +19,8 @@ if __name__ == "__main__":
            short_character_anno = f.readlines()
            new_annos += short_character_anno
    # Source 2: transcribed long audio segments
-    if os.path.exists("../long_character_anno.txt"):
-        with open("../long_character_anno.txt", 'r', encoding='utf-8') as f:
+    if os.path.exists("./long_character_anno.txt"):
+        with open("./long_character_anno.txt", 'r', encoding='utf-8') as f:
            long_character_anno = f.readlines()
            new_annos += long_character_anno

@@ -33,7 +33,7 @@ if __name__ == "__main__":
    assert (len(speakers) != 0), "No audio file found. Please check your uploaded file structure."
    # Source 3 (Optional): sampled audios as extra training helpers
    if args.add_auxiliary_data:
-        with open("../sampled_audio4ft.txt", 'r', encoding='utf-8') as f:
+        with open("./sampled_audio4ft.txt", 'r', encoding='utf-8') as f:
            old_annos = f.readlines()
        # filter old_annos according to supported languages
        filtered_old_annos = []
@@ -55,7 +55,7 @@ if __name__ == "__main__":


        # STEP 2: modify config file
-        with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
+        with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
            hps = json.load(f)

        # assign ids to new speakers
@@ -72,7 +72,7 @@ if __name__ == "__main__":
        hps['data']['training_files'] = "final_annotation_train.txt"
        hps['data']['validation_files'] = "final_annotation_val.txt"
        # save modified config
-        with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
+        with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
            json.dump(hps, f, indent=2)

        # STEP 3: clean annotations, replace speaker names with assigned speaker IDs
@@ -96,18 +96,18 @@ if __name__ == "__main__":
        # merge with old annotation
        final_annos = cleaned_old_annos + cc_duplicate * cleaned_new_annos
        # save annotation file
-        with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f:
+        with open("./final_annotation_train.txt", 'w', encoding='utf-8') as f:
            for line in final_annos:
                f.write(line)
        # save annotation file for validation
-        with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f:
+        with open("./final_annotation_val.txt", 'w', encoding='utf-8') as f:
            for line in cleaned_new_annos:
                f.write(line)
        print("finished")
    else:
        # Do not add extra helper data
        # STEP 1: modify config file
-        with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
+        with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
            hps = json.load(f)

        # assign ids to new speakers
@@ -124,7 +124,7 @@ if __name__ == "__main__":
        hps['data']['training_files'] = "final_annotation_train.txt"
        hps['data']['validation_files'] = "final_annotation_val.txt"
        # save modified config
-        with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
+        with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
            json.dump(hps, f, indent=2)

        # STEP 2: clean annotations, replace speaker names with assigned speaker IDs
@@ -141,11 +141,11 @@ if __name__ == "__main__":

        final_annos = cleaned_new_annos
        # save annotation file
-        with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f:
+        with open("./final_annotation_train.txt", 'w', encoding='utf-8') as f:
            for line in final_annos:
                f.write(line)
        # save annotation file for validation
-        with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f:
+        with open("./final_annotation_val.txt", 'w', encoding='utf-8') as f:
            for line in cleaned_new_annos:
                f.write(line)
        print("finished")
@@ -21,16 +21,16 @@ if __name__ == "__main__":
            hps['speakers'][speaker] = i
        hps['data']['n_speakers'] = len(valid_speakers)
        model_sd['model']['emb_g.weight'] = new_emb_g
-        with open("../finetune_speaker.json", 'w', encoding='utf-8') as f:
+        with open("./finetune_speaker.json", 'w', encoding='utf-8') as f:
            json.dump(hps, f, indent=2)
-        torch.save(model_sd, "../G_latest.pth")
+        torch.save(model_sd, "./G_latest.pth")
    else:
-        with open("../finetune_speaker.json", 'w', encoding='utf-8') as f:
+        with open("./finetune_speaker.json", 'w', encoding='utf-8') as f:
            json.dump(hps, f, indent=2)
-        torch.save(model_sd, "../G_latest.pth")
+        torch.save(model_sd, "./G_latest.pth")
    # save another config file copy in MoeGoe format
    hps['speakers'] = valid_speakers
-    with open("../moegoe_config.json", 'w', encoding='utf-8') as f:
+    with open("./moegoe_config.json", 'w', encoding='utf-8') as f:
        json.dump(hps, f, indent=2)


@@ -56,7 +56,7 @@ if __name__ == "__main__":
    speaker_annos = []
    # resample audios
    # 2023/4/21: Get the target sampling rate
-    with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
+    with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
        hps = json.load(f)
    target_sr = hps['data']['sampling_rate']
    for speaker in speaker_names:
@@ -3,8 +3,8 @@ from concurrent.futures import ThreadPoolExecutor

 from moviepy.editor import AudioFileClip

-video_dir = "../video_data/"
-audio_dir = "../raw_audio/"
+video_dir = "./video_data/"
+audio_dir = "./raw_audio/"
 filelist = list(os.walk(video_dir))[0][2]


@@ -12,17 +12,17 @@ if __name__ == "__main__":
    uploaded = files.upload() # 上传文件
    assert(file_type in ['zip', 'audio', 'video'])
    if file_type == "zip":
-        upload_path = "../custom_character_voice/"
+        upload_path = "./custom_character_voice/"
        for filename in uploaded.keys():
            #将上传的文件移动到指定的位置上
            shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, "custom_character_voice.zip"))
    elif file_type == "audio":
-        upload_path = "../raw_audio/"
+        upload_path = "./raw_audio/"
        for filename in uploaded.keys():
            #将上传的文件移动到指定的位置上
            shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename))
    elif file_type == "video":
-        upload_path = "../video_data/"
+        upload_path = "./video_data/"
        for filename in uploaded.keys():
            # 将上传的文件移动到指定的位置上
            shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename))