[+] Graph tilt statistics

This commit is contained in:
Hykilpikonna
2022-03-21 01:25:36 -04:00
parent c81cbc85bc
commit 400afd7413
+69 -29
View File
@@ -4,6 +4,7 @@ import csv
import json
import os
from dataclasses import dataclass
from json import JSONDecodeError
from multiprocessing import Pool
from os import PathLike
from pathlib import Path
@@ -184,7 +185,7 @@ def calc_col_stats(col: np.ndarray) -> Statistics:
)
def calculate_statistics(arr: np.ndarray) -> FrequencyStats:
def calculate_freq_statistics(arr: np.ndarray) -> FrequencyStats:
"""
Calculate frequency data array statistics
@@ -197,25 +198,43 @@ def calculate_statistics(arr: np.ndarray) -> FrequencyStats:
return FrequencyStats(*result)
def vox_celeb_statistics_helper(id_dir: Path):
def vox_celeb_statistics_freq(id_dir: Path):
# Load all files
cumulative: np.ndarray = np.concatenate([np.load(f) for f in get_audio_paths(id_dir, 'npy')])
# Remove out NaN values
cumulative = cumulative[~np.isnan(cumulative).any(axis=1), :]
result = calculate_statistics(cumulative)
result = calculate_freq_statistics(cumulative)
# Write results
with open(id_dir.joinpath('stats.json'), 'w') as jsonfile:
jsonfile.write(jsonpickle.encode(result, jsonfile, indent=1))
def vox_celeb_statistics_tilt(id_dir: Path):
# Load all calculated files
cumulative = []
for f in get_audio_paths(id_dir, 'json'):
try:
cumulative.append(json.loads(Path(f).read_text('utf-8'))['tilt'])
except JSONDecodeError:
print(f'Error in {f}')
# Remove out NaN values
cumulative = [c for c in cumulative if c is not None]
result = calc_col_stats(np.array(cumulative))
# Write results
with open(id_dir.joinpath('tilt.json'), 'w') as jsonfile:
jsonfile.write(jsonpickle.encode(result, jsonfile, indent=1))
def vox_celeb_statistics():
id_dirs = [id_dir for id, id_dir in loop_id_dirs()]
# Loop through all ids
with Pool(CPU_CORES) as pool:
for _ in tqdm.tqdm(pool.imap(vox_celeb_statistics_helper, id_dirs), total=len(id_dirs)):
for _ in tqdm.tqdm(pool.imap(vox_celeb_statistics_tilt, id_dirs), total=len(id_dirs)):
pass
@@ -242,27 +261,9 @@ def collect_statistics():
m_means = np.array([[t.mean for t in [s.pitch, s.f1, s.f2, s.f3, s.f1ratio, s.f2ratio, s.f3ratio]]
for s, ag in stats_list if ag == 'm'])
# Plot histograms
# for i in range(len(headers)):
# fig, ax = subplots()
#
# ax.set_title(f'Statistical Differences of {headers[i]}')
# if 'Ratio' in headers[i]:
# ax.set_xlabel('Multiplier from Pitch')
# else:
# ax.set_xlabel('Frequency (hz)')
#
# ax.hist(f_means[:, i], bins=40, color='#F5A9B8', alpha=0.5)
# ax.twinx().hist(m_means[:, i], bins=40, color='#5BCEFA', alpha=0.5)
#
# plt.show()
# plt.close()
# Plot bar chart
sns.set_theme(style="ticks")
fig, ax = subplots(figsize=(10, 5))
# ax.set_xscale('log')
#print(sns.load_dataset('tips'))
print("Pitch")
print(calc_col_stats(f_means[:, 0]))
@@ -279,12 +280,7 @@ def collect_statistics():
df = pd.DataFrame({headers[i]: f_means[:, i] for i in range(4)})
dm = pd.DataFrame({headers[i]: m_means[:, i] for i in range(4)})
# data.boxplot()
# sns.boxplot(data=df, orient='h', color='#F5A9B8', linewidth=0.5)
# sns.boxplot(data=dm, orient='h', color='#5BCEFA', linewidth=0.5)
# sns.stripplot(x="distance", y="method", data=data, size=4, color=".3", linewidth=0)
args = dict(orient='h', scale='width', inner='quartile', linewidth=0.5)
#dt=pd.DataFrame({"Female":df, "Male":dm})
sns.violinplot(data=df, color=COLOR_PINK, **args)
sns.violinplot(data=dm, color=COLOR_BLUE, **args)
[c.set_alpha(0.7) for c in ax.collections]
@@ -304,12 +300,56 @@ def collect_statistics():
plt.show()
def collect_tilt():
"""
Collect statistics and draw interesting visualizations from its results
"""
# Read stats
stats_list: list[tuple[Statistics, ASAB]] = []
for id, id_dir in loop_id_dirs():
stats_dir = id_dir.joinpath('tilt.json')
if not stats_dir.is_file():
continue
stats_list.append((jsonpickle.decode(stats_dir.read_text()), agab[id]))
# Get AFAB and AMAB means
f_means = np.array([s.mean for s, ag in stats_list if ag == 'f'])
m_means = np.array([s.mean for s, ag in stats_list if ag == 'm'])
# Plot bar chart
sns.set_theme(style="ticks")
fig, ax = subplots(figsize=(10, 5))
df = pd.DataFrame({"Tilt": f_means})
dm = pd.DataFrame({"Tilt": m_means})
args = dict(orient='h', scale='width', inner='quartile', linewidth=0.5)
sns.violinplot(data=df, color=COLOR_PINK, **args)
sns.violinplot(data=dm, color=COLOR_BLUE, **args)
[c.set_alpha(0.7) for c in ax.collections]
# Create legend
legend_elements = [
Patch(facecolor=COLOR_PINK, edgecolor='r', label='Feminine'),
Patch(facecolor=COLOR_BLUE, edgecolor='b', label='Masculine'),
]
plt.legend(handles=legend_elements)
ax.set_title("Distribution of Spectral Tilt on Gender")
ax.xaxis.grid(True)
ax.set_ylabel('')
ax.set_xlabel('Tilt Value')
sns.despine(fig, ax)
plt.show()
if __name__ == '__main__':
vox_celeb_dir = Path('C:/Workspace/EECS 6414/Datasets/VoxCeleb1/wav')
vox_celeb_dir = Path('C:/Datasets/VoxCeleb1/wav')
agab = load_vox_celeb_asab_dict(vox_celeb_dir.joinpath('../vox1_meta.csv'))
# print(calculate_freq_info(parselmouth.Sound('../00001.wav')))
# print(calculate_freq_info(parselmouth.Sound('D:/Downloads/Vowels-Extract-Z-44kHz.flac')))
# print(calculate_freq_info(parselmouth.Sound('D:/Downloads/Vowels-Azalea.flac')))
compute_vox_celeb()
# compute_vox_celeb()
# vox_celeb_statistics()
# collect_statistics()
collect_tilt()