[+] Create validation script for CN-Celeb

This commit is contained in:
Hykilpikonna
2021-12-23 15:07:42 -05:00
parent 4c8c43b8ef
commit 963e50dfbc
2 changed files with 78 additions and 3 deletions
+7 -3
View File
@@ -138,12 +138,12 @@ def draw_result(file: str, result: Result):
return to_wav(file, wav_callback)
def get_result_percentages(result: Result) -> tuple[float, float, float]:
def get_result_percentages(result: Result) -> tuple[float, float, float, float]:
"""
Get percentages
:param result: Result
:return: %female, %male, %other
:return: %female, %male, %other, %female-vs-female+male
"""
# Count total and categorical durations
total_dur = 0
@@ -160,7 +160,11 @@ def get_result_percentages(result: Result) -> tuple[float, float, float]:
# Return results
f = durations.get('female', 0)
m = durations.get('male', 0)
return f, m, 1 - f - m
fm_total = f + m
pf = 0 if fm_total == 0 else f / fm_total
return f, m, 1 - f - m, pf
def test():
+71
View File
@@ -0,0 +1,71 @@
import json
import os
import warnings
from pathlib import Path
import numpy as np
from inaSpeechSegmenter import Segmenter
from ina_main import process, get_result_percentages
def segment_all():
# Create segmenter
seg = Segmenter()
np.seterr(invalid='ignore')
# Loop through all celebrities
ids = [id for id in os.listdir(data_dir) if id.startswith('id')]
for id in ids:
id_dir = data_dir.joinpath(id)
# Loop through all recordings (Exclude singing for now)
utters = [r for r in os.listdir(id_dir) if r.endswith('.flac')
and not r.startswith('singing')]
# Exclude existing
utters = [id_dir.joinpath(u) for u in utters]
utters = [u for u in utters if not u.with_suffix('.json').exists()]
if len(utters) == 0:
continue
# Analyze
results = process(seg, [str(u) for u in utters], verbose=True)
# Write results
total = [0, 0, 0, 0, 0]
type_totals = {}
for result in results.results:
file = Path(result.file).with_suffix('.json')
# Get results
# f: Frames, r: Ratios
ratios = [round(r, 3) for r in get_result_percentages(result)]
stored = {'f': result.frames, 'r': ratios}
# Count type total (type_totals[utter_type][-1] is the count)
file_name = file.name
utter_type = file_name[:file_name.index('-')]
type_totals.setdefault(utter_type, [0, 0, 0, 0, 0])
for i in range(4):
type_totals[utter_type][i] += ratios[i]
total[i] += ratios[i]
type_totals[utter_type][-1] += 1
total[-1] += 1
# Write result
file.write_text(json.dumps(stored))
# Write type averages
type_averages = {t: [r / type_totals[t][-1] for r in type_totals[t][:-1]] for t in type_totals}
total_average = [r / total[-1] for r in total[:-1]]
obj = {'type_averages': type_averages, 'total_averages': total_average}
id_dir.joinpath('total.json').write_text(json.dumps(obj))
if __name__ == '__main__':
cn_celeb_root = Path('C:/Users/me/Workspace/Data/CN-Celeb_flac')
data_dir = cn_celeb_root.joinpath('data')
segment_all()