rearranged repo

This commit is contained in:
Plachta
2023-04-21 21:39:41 +08:00
parent 2612e5dbcc
commit 3d7e4220d4
7 changed files with 28 additions and 28 deletions
+3 -3
View File
@@ -1,11 +1,11 @@
import os
import json
import torchaudio
raw_audio_dir = "../raw_audio/"
denoise_audio_dir = "../denoised_audio/"
raw_audio_dir = "./raw_audio/"
denoise_audio_dir = "./denoised_audio/"
filelist = list(os.walk(raw_audio_dir))[0][2]
# 2023/4/21: Get the target sampling rate
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
hps = json.load(f)
target_sr = hps['data']['sampling_rate']
for file in filelist:
+3 -3
View File
@@ -6,7 +6,7 @@ import torchaudio
import librosa
import torch
import argparse
parent_dir = "../denoised_audio/"
parent_dir = "./denoised_audio/"
filelist = list(os.walk(parent_dir))[0][2]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
@@ -29,7 +29,7 @@ if __name__ == "__main__":
'zh': "[ZH]",
}
assert(torch.cuda.is_available()), "Please enable GPU in order to run Whisper!"
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
hps = json.load(f)
target_sr = hps['data']['sampling_rate']
model = whisper.load_model(args.whisper_size)
@@ -70,6 +70,6 @@ if __name__ == "__main__":
if len(speaker_annos) == 0:
print("Warning: no long audios & videos found, this IS expected if you have only uploaded short audios")
print("this IS NOT expected if you have uploaded any long audios, videos or video links. Please check your file structure or make sure your audio/video language is supported.")
with open("../long_character_anno.txt", 'w', encoding='utf-8') as f:
with open("./long_character_anno.txt", 'w', encoding='utf-8') as f:
for line in speaker_annos:
f.write(line)
+11 -11
View File
@@ -19,8 +19,8 @@ if __name__ == "__main__":
short_character_anno = f.readlines()
new_annos += short_character_anno
# Source 2: transcribed long audio segments
if os.path.exists("../long_character_anno.txt"):
with open("../long_character_anno.txt", 'r', encoding='utf-8') as f:
if os.path.exists("./long_character_anno.txt"):
with open("./long_character_anno.txt", 'r', encoding='utf-8') as f:
long_character_anno = f.readlines()
new_annos += long_character_anno
@@ -33,7 +33,7 @@ if __name__ == "__main__":
assert (len(speakers) != 0), "No audio file found. Please check your uploaded file structure."
# Source 3 (Optional): sampled audios as extra training helpers
if args.add_auxiliary_data:
with open("../sampled_audio4ft.txt", 'r', encoding='utf-8') as f:
with open("./sampled_audio4ft.txt", 'r', encoding='utf-8') as f:
old_annos = f.readlines()
# filter old_annos according to supported languages
filtered_old_annos = []
@@ -55,7 +55,7 @@ if __name__ == "__main__":
# STEP 2: modify config file
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
hps = json.load(f)
# assign ids to new speakers
@@ -72,7 +72,7 @@ if __name__ == "__main__":
hps['data']['training_files'] = "final_annotation_train.txt"
hps['data']['validation_files'] = "final_annotation_val.txt"
# save modified config
with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
json.dump(hps, f, indent=2)
# STEP 3: clean annotations, replace speaker names with assigned speaker IDs
@@ -96,18 +96,18 @@ if __name__ == "__main__":
# merge with old annotation
final_annos = cleaned_old_annos + cc_duplicate * cleaned_new_annos
# save annotation file
with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f:
with open("./final_annotation_train.txt", 'w', encoding='utf-8') as f:
for line in final_annos:
f.write(line)
# save annotation file for validation
with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f:
with open("./final_annotation_val.txt", 'w', encoding='utf-8') as f:
for line in cleaned_new_annos:
f.write(line)
print("finished")
else:
# Do not add extra helper data
# STEP 1: modify config file
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
hps = json.load(f)
# assign ids to new speakers
@@ -124,7 +124,7 @@ if __name__ == "__main__":
hps['data']['training_files'] = "final_annotation_train.txt"
hps['data']['validation_files'] = "final_annotation_val.txt"
# save modified config
with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
json.dump(hps, f, indent=2)
# STEP 2: clean annotations, replace speaker names with assigned speaker IDs
@@ -141,11 +141,11 @@ if __name__ == "__main__":
final_annos = cleaned_new_annos
# save annotation file
with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f:
with open("./final_annotation_train.txt", 'w', encoding='utf-8') as f:
for line in final_annos:
f.write(line)
# save annotation file for validation
with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f:
with open("./final_annotation_val.txt", 'w', encoding='utf-8') as f:
for line in cleaned_new_annos:
f.write(line)
print("finished")
+5 -5
View File
@@ -21,16 +21,16 @@ if __name__ == "__main__":
hps['speakers'][speaker] = i
hps['data']['n_speakers'] = len(valid_speakers)
model_sd['model']['emb_g.weight'] = new_emb_g
with open("../finetune_speaker.json", 'w', encoding='utf-8') as f:
with open("./finetune_speaker.json", 'w', encoding='utf-8') as f:
json.dump(hps, f, indent=2)
torch.save(model_sd, "../G_latest.pth")
torch.save(model_sd, "./G_latest.pth")
else:
with open("../finetune_speaker.json", 'w', encoding='utf-8') as f:
with open("./finetune_speaker.json", 'w', encoding='utf-8') as f:
json.dump(hps, f, indent=2)
torch.save(model_sd, "../G_latest.pth")
torch.save(model_sd, "./G_latest.pth")
# save another config file copy in MoeGoe format
hps['speakers'] = valid_speakers
with open("../moegoe_config.json", 'w', encoding='utf-8') as f:
with open("./moegoe_config.json", 'w', encoding='utf-8') as f:
json.dump(hps, f, indent=2)
+1 -1
View File
@@ -56,7 +56,7 @@ if __name__ == "__main__":
speaker_annos = []
# resample audios
# 2023/4/21: Get the target sampling rate
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
hps = json.load(f)
target_sr = hps['data']['sampling_rate']
for speaker in speaker_names:
+2 -2
View File
@@ -3,8 +3,8 @@ from concurrent.futures import ThreadPoolExecutor
from moviepy.editor import AudioFileClip
video_dir = "../video_data/"
audio_dir = "../raw_audio/"
video_dir = "./video_data/"
audio_dir = "./raw_audio/"
filelist = list(os.walk(video_dir))[0][2]
+3 -3
View File
@@ -12,17 +12,17 @@ if __name__ == "__main__":
uploaded = files.upload() # 上传文件
assert(file_type in ['zip', 'audio', 'video'])
if file_type == "zip":
upload_path = "../custom_character_voice/"
upload_path = "./custom_character_voice/"
for filename in uploaded.keys():
#将上传的文件移动到指定的位置上
shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, "custom_character_voice.zip"))
elif file_type == "audio":
upload_path = "../raw_audio/"
upload_path = "./raw_audio/"
for filename in uploaded.keys():
#将上传的文件移动到指定的位置上
shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename))
elif file_type == "video":
upload_path = "../video_data/"
upload_path = "./video_data/"
for filename in uploaded.keys():
# 将上传的文件移动到指定的位置上
shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename))