rearranged repo
This commit is contained in:
@@ -1,11 +1,11 @@
|
||||
import os
|
||||
import json
|
||||
import torchaudio
|
||||
raw_audio_dir = "../raw_audio/"
|
||||
denoise_audio_dir = "../denoised_audio/"
|
||||
raw_audio_dir = "./raw_audio/"
|
||||
denoise_audio_dir = "./denoised_audio/"
|
||||
filelist = list(os.walk(raw_audio_dir))[0][2]
|
||||
# 2023/4/21: Get the target sampling rate
|
||||
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
hps = json.load(f)
|
||||
target_sr = hps['data']['sampling_rate']
|
||||
for file in filelist:
|
||||
|
||||
@@ -6,7 +6,7 @@ import torchaudio
|
||||
import librosa
|
||||
import torch
|
||||
import argparse
|
||||
parent_dir = "../denoised_audio/"
|
||||
parent_dir = "./denoised_audio/"
|
||||
filelist = list(os.walk(parent_dir))[0][2]
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
@@ -29,7 +29,7 @@ if __name__ == "__main__":
|
||||
'zh': "[ZH]",
|
||||
}
|
||||
assert(torch.cuda.is_available()), "Please enable GPU in order to run Whisper!"
|
||||
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
hps = json.load(f)
|
||||
target_sr = hps['data']['sampling_rate']
|
||||
model = whisper.load_model(args.whisper_size)
|
||||
@@ -70,6 +70,6 @@ if __name__ == "__main__":
|
||||
if len(speaker_annos) == 0:
|
||||
print("Warning: no long audios & videos found, this IS expected if you have only uploaded short audios")
|
||||
print("this IS NOT expected if you have uploaded any long audios, videos or video links. Please check your file structure or make sure your audio/video language is supported.")
|
||||
with open("../long_character_anno.txt", 'w', encoding='utf-8') as f:
|
||||
with open("./long_character_anno.txt", 'w', encoding='utf-8') as f:
|
||||
for line in speaker_annos:
|
||||
f.write(line)
|
||||
|
||||
+11
-11
@@ -19,8 +19,8 @@ if __name__ == "__main__":
|
||||
short_character_anno = f.readlines()
|
||||
new_annos += short_character_anno
|
||||
# Source 2: transcribed long audio segments
|
||||
if os.path.exists("../long_character_anno.txt"):
|
||||
with open("../long_character_anno.txt", 'r', encoding='utf-8') as f:
|
||||
if os.path.exists("./long_character_anno.txt"):
|
||||
with open("./long_character_anno.txt", 'r', encoding='utf-8') as f:
|
||||
long_character_anno = f.readlines()
|
||||
new_annos += long_character_anno
|
||||
|
||||
@@ -33,7 +33,7 @@ if __name__ == "__main__":
|
||||
assert (len(speakers) != 0), "No audio file found. Please check your uploaded file structure."
|
||||
# Source 3 (Optional): sampled audios as extra training helpers
|
||||
if args.add_auxiliary_data:
|
||||
with open("../sampled_audio4ft.txt", 'r', encoding='utf-8') as f:
|
||||
with open("./sampled_audio4ft.txt", 'r', encoding='utf-8') as f:
|
||||
old_annos = f.readlines()
|
||||
# filter old_annos according to supported languages
|
||||
filtered_old_annos = []
|
||||
@@ -55,7 +55,7 @@ if __name__ == "__main__":
|
||||
|
||||
|
||||
# STEP 2: modify config file
|
||||
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
hps = json.load(f)
|
||||
|
||||
# assign ids to new speakers
|
||||
@@ -72,7 +72,7 @@ if __name__ == "__main__":
|
||||
hps['data']['training_files'] = "final_annotation_train.txt"
|
||||
hps['data']['validation_files'] = "final_annotation_val.txt"
|
||||
# save modified config
|
||||
with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
json.dump(hps, f, indent=2)
|
||||
|
||||
# STEP 3: clean annotations, replace speaker names with assigned speaker IDs
|
||||
@@ -96,18 +96,18 @@ if __name__ == "__main__":
|
||||
# merge with old annotation
|
||||
final_annos = cleaned_old_annos + cc_duplicate * cleaned_new_annos
|
||||
# save annotation file
|
||||
with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f:
|
||||
with open("./final_annotation_train.txt", 'w', encoding='utf-8') as f:
|
||||
for line in final_annos:
|
||||
f.write(line)
|
||||
# save annotation file for validation
|
||||
with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f:
|
||||
with open("./final_annotation_val.txt", 'w', encoding='utf-8') as f:
|
||||
for line in cleaned_new_annos:
|
||||
f.write(line)
|
||||
print("finished")
|
||||
else:
|
||||
# Do not add extra helper data
|
||||
# STEP 1: modify config file
|
||||
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
hps = json.load(f)
|
||||
|
||||
# assign ids to new speakers
|
||||
@@ -124,7 +124,7 @@ if __name__ == "__main__":
|
||||
hps['data']['training_files'] = "final_annotation_train.txt"
|
||||
hps['data']['validation_files'] = "final_annotation_val.txt"
|
||||
# save modified config
|
||||
with open("../configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
with open("./configs/modified_finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
json.dump(hps, f, indent=2)
|
||||
|
||||
# STEP 2: clean annotations, replace speaker names with assigned speaker IDs
|
||||
@@ -141,11 +141,11 @@ if __name__ == "__main__":
|
||||
|
||||
final_annos = cleaned_new_annos
|
||||
# save annotation file
|
||||
with open("../final_annotation_train.txt", 'w', encoding='utf-8') as f:
|
||||
with open("./final_annotation_train.txt", 'w', encoding='utf-8') as f:
|
||||
for line in final_annos:
|
||||
f.write(line)
|
||||
# save annotation file for validation
|
||||
with open("../final_annotation_val.txt", 'w', encoding='utf-8') as f:
|
||||
with open("./final_annotation_val.txt", 'w', encoding='utf-8') as f:
|
||||
for line in cleaned_new_annos:
|
||||
f.write(line)
|
||||
print("finished")
|
||||
@@ -21,16 +21,16 @@ if __name__ == "__main__":
|
||||
hps['speakers'][speaker] = i
|
||||
hps['data']['n_speakers'] = len(valid_speakers)
|
||||
model_sd['model']['emb_g.weight'] = new_emb_g
|
||||
with open("../finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
with open("./finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
json.dump(hps, f, indent=2)
|
||||
torch.save(model_sd, "../G_latest.pth")
|
||||
torch.save(model_sd, "./G_latest.pth")
|
||||
else:
|
||||
with open("../finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
with open("./finetune_speaker.json", 'w', encoding='utf-8') as f:
|
||||
json.dump(hps, f, indent=2)
|
||||
torch.save(model_sd, "../G_latest.pth")
|
||||
torch.save(model_sd, "./G_latest.pth")
|
||||
# save another config file copy in MoeGoe format
|
||||
hps['speakers'] = valid_speakers
|
||||
with open("../moegoe_config.json", 'w', encoding='utf-8') as f:
|
||||
with open("./moegoe_config.json", 'w', encoding='utf-8') as f:
|
||||
json.dump(hps, f, indent=2)
|
||||
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ if __name__ == "__main__":
|
||||
speaker_annos = []
|
||||
# resample audios
|
||||
# 2023/4/21: Get the target sampling rate
|
||||
with open("../configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
with open("./configs/finetune_speaker.json", 'r', encoding='utf-8') as f:
|
||||
hps = json.load(f)
|
||||
target_sr = hps['data']['sampling_rate']
|
||||
for speaker in speaker_names:
|
||||
|
||||
@@ -3,8 +3,8 @@ from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from moviepy.editor import AudioFileClip
|
||||
|
||||
video_dir = "../video_data/"
|
||||
audio_dir = "../raw_audio/"
|
||||
video_dir = "./video_data/"
|
||||
audio_dir = "./raw_audio/"
|
||||
filelist = list(os.walk(video_dir))[0][2]
|
||||
|
||||
|
||||
|
||||
@@ -12,17 +12,17 @@ if __name__ == "__main__":
|
||||
uploaded = files.upload() # 上传文件
|
||||
assert(file_type in ['zip', 'audio', 'video'])
|
||||
if file_type == "zip":
|
||||
upload_path = "../custom_character_voice/"
|
||||
upload_path = "./custom_character_voice/"
|
||||
for filename in uploaded.keys():
|
||||
#将上传的文件移动到指定的位置上
|
||||
shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, "custom_character_voice.zip"))
|
||||
elif file_type == "audio":
|
||||
upload_path = "../raw_audio/"
|
||||
upload_path = "./raw_audio/"
|
||||
for filename in uploaded.keys():
|
||||
#将上传的文件移动到指定的位置上
|
||||
shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename))
|
||||
elif file_type == "video":
|
||||
upload_path = "../video_data/"
|
||||
upload_path = "./video_data/"
|
||||
for filename in uploaded.keys():
|
||||
# 将上传的文件移动到指定的位置上
|
||||
shutil.move(os.path.join(basepath, filename), os.path.join(upload_path, filename))
|
||||
Reference in New Issue
Block a user