From 2612e5dbcc63b848765052989f381f3835f7b59c Mon Sep 17 00:00:00 2001 From: Plachta Date: Fri, 21 Apr 2023 21:19:26 +0800 Subject: [PATCH] rearranged repo --- VC_inference.py => scripts/VC_inference.py | 0 scripts/denoise_audio.py | 6 +++--- scripts/long_audio_transcribe.py | 6 +++--- preprocess_v2.py => scripts/preprocess_v2.py | 22 ++++++++++---------- scripts/rearrange_speaker.py | 10 ++++----- scripts/short_audio_transcribe.py | 4 ++-- scripts/video2audio.py | 4 ++-- scripts/voice_upload.py | 6 +++--- 8 files changed, 29 insertions(+), 29 deletions(-) rename VC_inference.py => scripts/VC_inference.py (100%) rename preprocess_v2.py => scripts/preprocess_v2.py (87%) diff --git a/VC_inference.py b/scripts/VC_inference.py similarity index 100% rename from VC_inference.py rename to scripts/VC_inference.py diff --git a/scripts/denoise_audio.py b/scripts/denoise_audio.py index fc061c6..362ffef 100644 --- a/scripts/denoise_audio.py +++ b/scripts/denoise_audio.py @@ -1,11 +1,11 @@ import os import json import torchaudio -raw_audio_dir = "./raw_audio/" -denoise_audio_dir = "./denoised_audio/" +raw_audio_dir = "../raw_audio/" +denoise_audio_dir = "../denoised_audio/" filelist = list(os.walk(raw_audio_dir))[0][2] # 2023/4/21: Get the target sampling rate -with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f: +with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f: hps = json.load(f) target_sr = hps['data']['sampling_rate'] for file in filelist: diff --git a/scripts/long_audio_transcribe.py b/scripts/long_audio_transcribe.py index e839855..9653744 100644 --- a/scripts/long_audio_transcribe.py +++ b/scripts/long_audio_transcribe.py @@ -6,7 +6,7 @@ import torchaudio import librosa import torch import argparse -parent_dir = "./denoised_audio/" +parent_dir = "../denoised_audio/" filelist = list(os.walk(parent_dir))[0][2] if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -29,7 +29,7 @@ if __name__ == "__main__": 'zh': "[ZH]", } assert(torch.cuda.is_available()), "Please enable GPU in order to run Whisper!" - with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f: + with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f: hps = json.load(f) target_sr = hps['data']['sampling_rate'] model = whisper.load_model(args.whisper_size) @@ -70,6 +70,6 @@ if __name__ == "__main__": if len(speaker_annos) == 0: print("Warning: no long audios & videos found, this IS expected if you have only uploaded short audios") print("this IS NOT expected if you have uploaded any long audios, videos or video links. Please check your file structure or make sure your audio/video language is supported.") - with open("long_character_anno.txt", 'w', encoding='utf-8') as f: + with open("../long_character_anno.txt", 'w', encoding='utf-8') as f: for line in speaker_annos: f.write(line) diff --git a/preprocess_v2.py b/scripts/preprocess_v2.py similarity index 87% rename from preprocess_v2.py rename to scripts/preprocess_v2.py index 38fa0e7..831c830 100644 --- a/preprocess_v2.py +++ b/scripts/preprocess_v2.py @@ -19,8 +19,8 @@ if __name__ == "__main__": short_character_anno = f.readlines() new_annos += short_character_anno # Source 2: transcribed long audio segments - if os.path.exists("long_character_anno.txt"): - with open("long_character_anno.txt", 'r', encoding='utf-8') as f: + if os.path.exists("../long_character_anno.txt"): + with open("../long_character_anno.txt", 'r', encoding='utf-8') as f: long_character_anno = f.readlines() new_annos += long_character_anno @@ -33,7 +33,7 @@ if __name__ == "__main__": assert (len(speakers) != 0), "No audio file found. Please check your uploaded file structure." # Source 3 (Optional): sampled audios as extra training helpers if args.add_auxiliary_data: - with open("sampled_audio4ft.txt", 'r', encoding='utf-8') as f: + with open("../sampled_audio4ft.txt", 'r', encoding='utf-8') as f: old_annos = f.readlines() # filter old_annos according to supported languages filtered_old_annos = [] @@ -55,7 +55,7 @@ if __name__ == "__main__": # STEP 2: modify config file - with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f: + with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f: hps = json.load(f) # assign ids to new speakers @@ -72,7 +72,7 @@ if __name__ == "__main__": hps['data']['training_files'] = "final_annotation_train.txt" hps['data']['validation_files'] = "final_annotation_val.txt" # save modified config - with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f: + with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f: json.dump(hps, f, indent=2) # STEP 3: clean annotations, replace speaker names with assigned speaker IDs @@ -96,18 +96,18 @@ if __name__ == "__main__": # merge with old annotation final_annos = cleaned_old_annos + cc_duplicate * cleaned_new_annos # save annotation file - with open("final_annotation_train.txt", 'w', encoding='utf-8') as f: + with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f: for line in final_annos: f.write(line) # save annotation file for validation - with open("final_annotation_val.txt", 'w', encoding='utf-8') as f: + with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f: for line in cleaned_new_annos: f.write(line) print("finished") else: # Do not add extra helper data # STEP 1: modify config file - with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f: + with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f: hps = json.load(f) # assign ids to new speakers @@ -124,7 +124,7 @@ if __name__ == "__main__": hps['data']['training_files'] = "final_annotation_train.txt" hps['data']['validation_files'] = "final_annotation_val.txt" # save modified config - with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f: + with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f: json.dump(hps, f, indent=2) # STEP 2: clean annotations, replace speaker names with assigned speaker IDs @@ -141,11 +141,11 @@ if __name__ == "__main__": final_annos = cleaned_new_annos # save annotation file - with open("final_annotation_train.txt", 'w', encoding='utf-8') as f: + with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f: for line in final_annos: f.write(line) # save annotation file for validation - with open("final_annotation_val.txt", 'w', encoding='utf-8') as f: + with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f: for line in cleaned_new_annos: f.write(line) print("finished") \ No newline at end of file diff --git a/scripts/rearrange_speaker.py b/scripts/rearrange_speaker.py index de0f754..40e41ee 100644 --- a/scripts/rearrange_speaker.py +++ b/scripts/rearrange_speaker.py @@ -21,16 +21,16 @@ if __name__ == "__main__": hps['speakers'][speaker] = i hps['data']['n_speakers'] = len(valid_speakers) model_sd['model']['emb_g.weight'] = new_emb_g - with open("./finetune_speaker.json", 'w', encoding='utf-8') as f: + with open("../finetune_speaker.json", 'w', encoding='utf-8') as f: json.dump(hps, f, indent=2) - torch.save(model_sd, "./G_latest.pth") + torch.save(model_sd, "../G_latest.pth") else: - with open("./finetune_speaker.json", 'w', encoding='utf-8') as f: + with open("../finetune_speaker.json", 'w', encoding='utf-8') as f: json.dump(hps, f, indent=2) - torch.save(model_sd, "./G_latest.pth") + torch.save(model_sd, "../G_latest.pth") # save another config file copy in MoeGoe format hps['speakers'] = valid_speakers - with open("./moegoe_config.json", 'w', encoding='utf-8') as f: + with open("../moegoe_config.json", 'w', encoding='utf-8') as f: json.dump(hps, f, indent=2) diff --git a/scripts/short_audio_transcribe.py b/scripts/short_audio_transcribe.py index 2cf3c69..110a5f1 100644 --- a/scripts/short_audio_transcribe.py +++ b/scripts/short_audio_transcribe.py @@ -51,12 +51,12 @@ if __name__ == "__main__": } assert (torch.cuda.is_available()), "Please enable GPU in order to run Whisper!" model = whisper.load_model(args.whisper_size) - parent_dir = "./custom_character_voice/" + parent_dir = "../custom_character_voice/" speaker_names = list(os.walk(parent_dir))[0][1] speaker_annos = [] # resample audios # 2023/4/21: Get the target sampling rate - with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f: + with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f: hps = json.load(f) target_sr = hps['data']['sampling_rate'] for speaker in speaker_names: diff --git a/scripts/video2audio.py b/scripts/video2audio.py index db50a5c..0094b02 100644 --- a/scripts/video2audio.py +++ b/scripts/video2audio.py @@ -3,8 +3,8 @@ from concurrent.futures import ThreadPoolExecutor from moviepy.editor import AudioFileClip -video_dir = "./video_data/" -audio_dir = "./raw_audio/" +video_dir = "../video_data/" +audio_dir = "../raw_audio/" filelist = list(os.walk(video_dir))[0][2] diff --git a/scripts/voice_upload.py b/scripts/voice_upload.py index 5c825a9..1f013d5 100644 --- a/scripts/voice_upload.py +++ b/scripts/voice_upload.py @@ -12,17 +12,17 @@ if __name__ == "__main__": uploaded = files.upload() # 上传文件 assert(file_type in ['zip', 'audio', 'video']) if file_type == "zip": - upload_path = "./custom_character_voice/" + upload_path = "../custom_character_voice/" for filename in uploaded.keys(): #将上传的文件移动到指定的位置上 shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, "custom_character_voice.zip")) elif file_type == "audio": - upload_path = "./raw_audio/" + upload_path = "../raw_audio/" for filename in uploaded.keys(): #将上传的文件移动到指定的位置上 shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename)) elif file_type == "video": - upload_path = "./video_data/" + upload_path = "../video_data/" for filename in uploaded.keys(): # 将上传的文件移动到指定的位置上 shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename)) \ No newline at end of file