Compare commits
21 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 565542996d | |||
| d322444f03 | |||
| 689e7d5e83 | |||
| 1bfd4c4346 | |||
| e6c4e7c612 | |||
| 1b20c97d84 | |||
| 91437f30f5 | |||
| 1aee72ace2 | |||
| da1667e241 | |||
| 826f5bc486 | |||
| ca26e3f058 | |||
| 906d649a39 | |||
| 568ed8db71 | |||
| b9353a8aed | |||
| bad9773996 | |||
| 2750ab4320 | |||
| dffdcf1f92 | |||
| 5c5110ee83 | |||
| 0080bd83e5 | |||
| d0f717949f | |||
| 26d73583ef |
+10
@@ -0,0 +1,10 @@
|
||||
__pycache__/
|
||||
|
||||
data/*.*
|
||||
!data/examples/
|
||||
output/*.*
|
||||
tracking_model.dat
|
||||
tracking_model.dat.gz
|
||||
temp/*.*
|
||||
|
||||
.idea
|
||||
@@ -1,9 +1,7 @@
|
||||
DeepFormants
|
||||
============
|
||||
# DeepFormants - PyTorch
|
||||
|
||||
Shua Dissen (shua.dissen@gmail.com)
|
||||
Joseph Keshet (joseph.keshet@biu.ac.il)
|
||||
|
||||
Joseph Keshet (joseph.keshet@biu.ac.il)
|
||||
|
||||
DeepFormants is a software package for formant tracking and estimation, using two algorithms based on deep networks. It works as follows:
|
||||
* The user provides a wav file with an initial stop consonant.
|
||||
@@ -14,8 +12,6 @@ DeepFormants is a software package for formant tracking and estimation, using tw
|
||||
|
||||
This is a beta version of DeepFormants. Any reports of bugs, comments on how to improve the software or documentation, or questions are greatly appreciated, and should be sent to the authors at the addresses given above.
|
||||
|
||||
---
|
||||
|
||||
|
||||
## Installation instructions
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
__author__ = 'jkeshet'
|
||||
Binary file not shown.
@@ -1,2 +0,0 @@
|
||||
NAME,F1,F2,F3,F4
|
||||
Example.wav,0.44537545612545,1.6422395494712,2.4786549639365,3.7613720611662
|
||||
|
@@ -0,0 +1,2 @@
|
||||
NAME,F1,F2,F3,F4
|
||||
data/Example.wav,445.3781247138977,1642.2462463378906,2478.6531925201416,3761.37638092041
|
||||
|
@@ -0,0 +1,232 @@
|
||||
NAME,F1,F2,F3,F4
|
||||
data/Example.wav0,597.52205346157,1613.615825947,2511.8207285284,3755.1438757858
|
||||
data/Example.wav1,695.08480385345,1702.1706191558,2596.4426788053,3798.3721538423
|
||||
data/Example.wav2,726.79075772661,1730.6156241641,2529.5339933464,3807.384140635
|
||||
data/Example.wav3,671.77748170461,1823.1959179265,2642.5386767146,3921.9318337055
|
||||
data/Example.wav4,577.26908479146,1658.9374345583,2507.1805413575,3614.0162748738
|
||||
data/Example.wav5,550.50722496918,1354.6169786772,2112.3333772251,2942.3889707506
|
||||
data/Example.wav6,670.04808527628,1721.7457206609,2504.707938971,3639.9432310079
|
||||
data/Example.wav7,684.92226190631,1890.1166920604,2607.1436989061,3881.6625485381
|
||||
data/Example.wav8,686.580095755,2006.1529795143,2669.1368249138,3930.0540770768
|
||||
data/Example.wav9,704.616588865,2082.8735492951,2725.6508141261,3979.9081525148
|
||||
data/Example.wav10,718.34596795837,2129.5719518295,2778.9886243771,3911.4009284072
|
||||
data/Example.wav11,738.12104125024,2196.3921107039,2830.6141101685,3945.1600780317
|
||||
data/Example.wav12,747.04236867145,2237.8571572583,2865.1145994638,3983.2438101659
|
||||
data/Example.wav13,757.52185658947,2262.8835036017,2894.0268377733,4032.0698122443
|
||||
data/Example.wav14,794.56202137723,2292.9250146918,2904.0583816242,4064.956479487
|
||||
data/Example.wav15,790.79377531938,2305.3923552752,2914.8010797784,4102.8657742226
|
||||
data/Example.wav16,754.2014691902,2296.2740822086,2918.6506819728,4093.5949673686
|
||||
data/Example.wav17,753.61338439514,2305.5901354885,2941.4827120608,4114.9736720268
|
||||
data/Example.wav18,751.33215593808,2303.9278843335,2963.7034688043,4080.8958987697
|
||||
data/Example.wav19,751.74861908763,2294.1665486638,2969.1708133022,4052.0389260712
|
||||
data/Example.wav20,752.20384863157,2263.3609611672,2960.7171653614,4032.3822222124
|
||||
data/Example.wav21,770.17882943915,2258.3641355602,2947.7116753323,4020.992568346
|
||||
data/Example.wav22,781.72306969874,2275.7714060748,2968.5430522874,4073.3884117019
|
||||
data/Example.wav23,767.10101225833,2285.7353288563,2991.478364211,4105.5821065336
|
||||
data/Example.wav24,713.05002492772,2258.4466922682,2990.0470727972,4115.3480684532
|
||||
data/Example.wav25,611.7969034471,2167.9040942168,2922.8273892721,3995.5696686444
|
||||
data/Example.wav26,464.79210160717,2216.9364941434,2902.6787165574,3787.1029546751
|
||||
data/Example.wav27,392.65101221768,2241.0843289313,2882.2946023061,3729.1100713992
|
||||
data/Example.wav28,356.80962181559,2247.3252960538,2868.0795793285,3702.8591416446
|
||||
data/Example.wav29,336.9676772927,2249.0793891123,2856.1652102349,3688.9097495654
|
||||
data/Example.wav30,326.31158629661,2247.0007064468,2847.416021249,3678.3969558777
|
||||
data/Example.wav31,317.02540438115,2246.1378049692,2837.0713463389,3668.4663509487
|
||||
data/Example.wav32,316.79609159736,2238.2372739626,2824.6751485046,3658.6574288876
|
||||
data/Example.wav33,311.81580759928,2233.9672603427,2811.5325892632,3650.0889295259
|
||||
data/Example.wav34,309.75648639653,2222.1951339059,2790.4140506337,3637.495284395
|
||||
data/Example.wav35,303.11808105036,2197.8377902068,2759.3459658566,3621.4651632683
|
||||
data/Example.wav36,300.07752292403,2139.6717590425,2716.2862832378,3597.1403776907
|
||||
data/Example.wav37,297.80619082987,2029.1436415698,2666.5071300739,3593.0232729185
|
||||
data/Example.wav38,302.76576287517,1883.6037448571,2632.7944209903,3566.5764758258
|
||||
data/Example.wav39,310.58715857994,1695.7093762879,2601.2359905618,3578.9921031022
|
||||
data/Example.wav40,329.36107592013,1506.8001665802,2566.4777731297,3574.0241337336
|
||||
data/Example.wav41,333.62712502459,1334.9304246222,2537.2661992439,3615.8905250384
|
||||
data/Example.wav42,343.92517617242,1257.8469712718,2548.6360602514,3608.753460545
|
||||
data/Example.wav43,355.72406454493,1266.9462591767,2555.762409923,3563.6341640243
|
||||
data/Example.wav44,353.35995469911,1295.7360334708,2541.8108288522,3537.0785332541
|
||||
data/Example.wav45,360.10719906727,1333.1447032372,2550.7381205563,3524.8234949992
|
||||
data/Example.wav46,358.56568716995,1421.9066811358,2569.8299617768,3551.4837671167
|
||||
data/Example.wav47,357.76589312374,1508.5752248245,2581.5208384623,3571.1153114675
|
||||
data/Example.wav48,361.02168734969,1578.8816475878,2595.1474893025,3581.7115897268
|
||||
data/Example.wav49,359.07561862133,1644.8613659186,2608.8160358409,3585.0460760813
|
||||
data/Example.wav50,352.62221542712,1695.4771441192,2607.0448130886,3594.1609116241
|
||||
data/Example.wav51,363.93994626501,1711.5051886296,2624.5562096012,3569.9529344004
|
||||
data/Example.wav52,282.14832173631,1635.5238860263,2662.5388733105,3673.039938999
|
||||
data/Example.wav53,238.01956608046,1685.143014879,2681.7911930688,3747.4362196863
|
||||
data/Example.wav54,270.38958334904,1674.099569196,2620.586370391,3813.3469193242
|
||||
data/Example.wav55,287.42519287375,1656.0382437507,2548.1687198886,3885.8811197819
|
||||
data/Example.wav56,297.50982220042,1646.5536123116,2490.0838905831,4001.8684884366
|
||||
data/Example.wav57,269.0833478587,1655.0781624051,2533.8535493795,4098.626102004
|
||||
data/Example.wav58,291.68328316815,1563.9679575743,2514.3394114724,4068.4713671901
|
||||
data/Example.wav59,373.26591074927,1470.0576108226,2536.1465738964,3887.3007630968
|
||||
data/Example.wav60,464.84084023832,1476.0970324231,2596.3413545437,3802.4460913542
|
||||
data/Example.wav61,532.78835221886,1503.9007128017,2676.9189270645,3773.1451371845
|
||||
data/Example.wav62,576.16842542913,1521.9442378943,2715.5872800012,3795.4597172608
|
||||
data/Example.wav63,612.9538586962,1510.2095985565,2711.3466980751,3787.8266496721
|
||||
data/Example.wav64,629.38634961618,1492.0035227392,2758.1039316015,3813.3692044249
|
||||
data/Example.wav65,632.21786673436,1503.6049091858,2728.8614765733,3829.6745931041
|
||||
data/Example.wav66,628.76514164972,1399.37905218,2727.178823681,3732.1010399703
|
||||
data/Example.wav67,636.82812175669,1382.5526600019,2700.6454369069,3741.2410738449
|
||||
data/Example.wav68,644.06202835304,1402.8833288655,2678.6612844051,3763.5773931668
|
||||
data/Example.wav69,665.04075110941,1455.4049891716,2678.1758807676,3749.7199742507
|
||||
data/Example.wav70,663.02738666806,1495.5861490809,2605.1125468193,3906.3056119887
|
||||
data/Example.wav71,666.87736727536,1492.3669814759,2548.126781982,3984.947976164
|
||||
data/Example.wav72,565.20706464525,1576.2364598879,2632.6207344777,4071.1717865148
|
||||
data/Example.wav73,515.69661558703,1637.7513729536,2663.9521213724,4089.587512229
|
||||
data/Example.wav74,503.97065378186,1665.6196537584,2674.2600076661,4094.0318435706
|
||||
data/Example.wav75,425.4988884807,1766.4652022781,2696.6257683491,4260.4826318254
|
||||
data/Example.wav76,373.34254527995,1823.0121878332,2680.0339335153,4323.5695332979
|
||||
data/Example.wav77,352.98177798978,1867.7964949578,2662.6618621315,4338.3873722444
|
||||
data/Example.wav78,394.96498686686,1871.3700048344,2652.4816853078,4438.0938852379
|
||||
data/Example.wav79,357.15002055549,1775.3943852451,2597.8223938733,4298.8671017868
|
||||
data/Example.wav80,329.50023533097,1705.4079753771,2546.4962395447,4159.7082892855
|
||||
data/Example.wav81,307.70045447983,1823.3596586369,2600.5280126313,4126.8640001054
|
||||
data/Example.wav82,331.85469389276,1858.1179130077,2641.2121635204,4074.3609971156
|
||||
data/Example.wav83,362.95343041272,1890.9928289307,2682.2890612514,4041.5172878108
|
||||
data/Example.wav84,395.24816807451,1914.7150588898,2723.9607510773,3988.2281709193
|
||||
data/Example.wav85,411.2056449606,1929.6787431283,2747.8093509512,3937.973080989
|
||||
data/Example.wav86,421.20895459838,1931.8285764322,2762.7062554099,3864.7288960853
|
||||
data/Example.wav87,421.43928593408,1931.0866933353,2757.3828694265,3843.4370860218
|
||||
data/Example.wav88,438.84117644341,1972.4723949097,2762.0587333307,3821.7816269103
|
||||
data/Example.wav89,460.58079366567,2025.948628033,2754.6313403087,3805.124095497
|
||||
data/Example.wav90,421.62905597676,2044.0912153488,2711.1269892068,3769.3898804996
|
||||
data/Example.wav91,399.71741002399,2058.9260658802,2705.7870502926,3758.4098629294
|
||||
data/Example.wav92,416.14766913342,2037.6172281561,2677.054342556,3750.8528066943
|
||||
data/Example.wav93,430.8781057625,2009.8629319817,2639.5492023239,3682.7942428493
|
||||
data/Example.wav94,452.25319778136,1961.5882276272,2559.1694229194,3584.249952029
|
||||
data/Example.wav95,462.44628437735,1909.516047408,2455.4124318309,3545.9309872252
|
||||
data/Example.wav96,482.17313946191,1895.5672946205,2435.9499675054,3540.2498092093
|
||||
data/Example.wav97,490.35065220484,1866.8441069923,2390.5755659871,3495.2652643518
|
||||
data/Example.wav98,493.89642586283,1817.7929698775,2283.9382392473,3432.3026537516
|
||||
data/Example.wav99,499.32606963064,1768.2433574417,2182.6628391829,3404.4990461484
|
||||
data/Example.wav100,501.54606354894,1715.5962165142,2124.6538157508,3438.0197890818
|
||||
data/Example.wav101,496.8346053791,1661.3587948124,2034.4715092381,3429.0955445083
|
||||
data/Example.wav102,493.33019060232,1608.1278006391,1954.9967859222,3486.9523677942
|
||||
data/Example.wav103,490.26973081437,1584.3782120341,1903.4815374306,3501.8928142151
|
||||
data/Example.wav104,483.28591710344,1580.6280556335,1912.0731497185,3490.8593842755
|
||||
data/Example.wav105,474.44279661302,1577.0982086823,1890.4039266946,3458.2198078865
|
||||
data/Example.wav106,475.36082747879,1533.860163092,1899.7547954454,3478.3153656802
|
||||
data/Example.wav107,464.1285437072,1536.3873917434,1882.1706426847,3515.360763387
|
||||
data/Example.wav108,453.77042808842,1544.4154360464,1942.6263553674,3436.1058536443
|
||||
data/Example.wav109,450.13048837516,1594.8789733705,2047.5336956964,3498.6641428002
|
||||
data/Example.wav110,432.11177790474,1639.5515650555,2101.4367396583,3428.2132652911
|
||||
data/Example.wav111,418.74825272536,1681.9736788497,2135.8666050376,3417.2586872462
|
||||
data/Example.wav112,405.3719533601,1731.9672364294,2183.0647484563,3431.8668085449
|
||||
data/Example.wav113,392.88948911473,1786.3264020268,2210.5835715195,3438.776773868
|
||||
data/Example.wav114,373.474737724,1887.6661375527,2309.1027506154,3502.7933820456
|
||||
data/Example.wav115,347.58051484696,1989.5414702858,2441.3326833827,3581.8816356432
|
||||
data/Example.wav116,334.20452402519,2026.0437562222,2537.9952168654,3597.5159762089
|
||||
data/Example.wav117,322.56279270492,2086.2363407419,2655.2152492099,3641.7497208844
|
||||
data/Example.wav118,339.3131062837,2121.955494922,2714.5995462561,3700.8412808396
|
||||
data/Example.wav119,341.31843679468,2125.8452516914,2726.6475819345,3698.1913478513
|
||||
data/Example.wav120,360.47365940763,2083.9305855041,2714.2131982139,3698.8186274682
|
||||
data/Example.wav121,363.11877436488,2061.8058595753,2710.3114520987,3697.0097019804
|
||||
data/Example.wav122,383.06871293946,1994.4962807334,2660.309502954,3714.4305725283
|
||||
data/Example.wav123,404.08621222125,1937.6203617943,2620.6225064107,3662.4969942625
|
||||
data/Example.wav124,413.17243288101,1867.9566432618,2559.8090959119,3645.7829716702
|
||||
data/Example.wav125,420.48916436109,1745.8194458989,2462.295725465,3623.2649125885
|
||||
data/Example.wav126,419.24176322359,1661.3616989629,2402.0839806123,3609.1423656623
|
||||
data/Example.wav127,419.92208177621,1553.3690139745,2212.1644536132,3806.0747605306
|
||||
data/Example.wav128,325.96640217642,1418.2143563765,2164.0253765053,3716.7086388812
|
||||
data/Example.wav129,285.5361624526,1291.4409476962,2178.5623379032,3789.8762049936
|
||||
data/Example.wav130,257.65403995027,1218.5145593312,2178.3165376511,3860.0558330691
|
||||
data/Example.wav131,256.48646950186,1246.2088069493,2187.8836016044,3912.8975753174
|
||||
data/Example.wav132,237.48487454102,1293.1092609666,2224.7222196052,3872.7596483396
|
||||
data/Example.wav133,231.3371136992,1331.6743570151,2252.9991078919,3857.0984512447
|
||||
data/Example.wav134,268.03000592681,1261.3075436434,2215.5175249951,3863.9755245014
|
||||
data/Example.wav135,282.80289770956,1146.358518138,2148.7017687456,3863.410314359
|
||||
data/Example.wav136,296.67440492938,1112.116343058,1999.1259408069,3869.4578382495
|
||||
data/Example.wav137,311.49556238198,1135.8070917503,1863.2277852469,3755.2022809818
|
||||
data/Example.wav138,333.62361376473,1213.1358577695,1879.936459201,3689.2143210604
|
||||
data/Example.wav139,345.56211347062,1318.7162380459,1889.7137934816,3540.8924917934
|
||||
data/Example.wav140,357.40395964001,1414.5240348431,1930.3329697306,3495.1231454417
|
||||
data/Example.wav141,362.92921932599,1520.0324016194,2003.7298816535,3567.9516256789
|
||||
data/Example.wav142,366.73898023917,1601.6325991009,2059.5198495076,3526.7265538346
|
||||
data/Example.wav143,366.69584214898,1685.0005123251,2122.2527064945,3509.322238216
|
||||
data/Example.wav144,350.59280100236,1769.2580008882,2177.2287375601,3518.7642065058
|
||||
data/Example.wav145,334.56543052476,1840.6293059936,2232.970066598,3521.9529347493
|
||||
data/Example.wav146,313.7897966543,1914.7014303275,2311.8075931725,3516.1139605998
|
||||
data/Example.wav147,310.19156328487,1969.8351420049,2425.2288267891,3518.8472234063
|
||||
data/Example.wav148,302.80988516026,2047.9466571048,2553.1701031146,3568.2264549211
|
||||
data/Example.wav149,297.39580786399,2096.6441730092,2654.2971016098,3615.8641189258
|
||||
data/Example.wav150,294.27351217035,2053.2819507505,2657.4310077457,3636.8164191387
|
||||
data/Example.wav151,302.19460420595,2000.4395154592,2667.5072643267,3723.9191493342
|
||||
data/Example.wav152,347.69087739847,1942.306463958,2651.1799637746,3775.0142372833
|
||||
data/Example.wav153,357.69736615595,1854.4442801434,2623.1786215667,3789.0248299735
|
||||
data/Example.wav154,382.29771027292,1760.0695958603,2583.7109294292,3817.8953390063
|
||||
data/Example.wav155,407.54373254369,1697.0890039167,2546.0031580126,3846.0282427007
|
||||
data/Example.wav156,432.90375875753,1668.9418422266,2516.1130250608,3853.6242937275
|
||||
data/Example.wav157,455.80132156975,1673.3618988922,2481.2311712326,3829.6650658339
|
||||
data/Example.wav158,513.15710656811,1728.4942135631,2484.5784016949,3835.8401018631
|
||||
data/Example.wav159,520.4708133912,1794.997626997,2514.1493186531,3892.1599280087
|
||||
data/Example.wav160,494.31146190259,1854.4323305575,2652.8624138335,3917.9945778114
|
||||
data/Example.wav161,451.78729185753,1891.213870271,2709.0307202319,3930.0457537516
|
||||
data/Example.wav162,447.71841803382,1926.0130757949,2769.6267089388,3945.9099584482
|
||||
data/Example.wav163,443.68309101769,1934.2676213287,2832.1539144242,3994.3128903197
|
||||
data/Example.wav164,459.41504232669,1983.5730885475,2868.7306516137,3998.5916884127
|
||||
data/Example.wav165,498.50688019077,2016.6491079952,2909.549290289,3991.1430082239
|
||||
data/Example.wav166,524.0709846093,2090.4154310312,2942.0014272027,3988.8117921514
|
||||
data/Example.wav167,545.81814079133,2156.1304911801,2966.5932667563,3994.0621857005
|
||||
data/Example.wav168,521.59948795582,2190.1831364237,2951.4625773832,3996.2775347309
|
||||
data/Example.wav169,484.75209778826,2175.6299572139,2876.5191747788,3917.8445743522
|
||||
data/Example.wav170,432.82956733879,2169.8999981256,2831.9149239351,3949.9130576946
|
||||
data/Example.wav171,420.21706806093,2174.7133307308,2808.9469815551,3943.579289652
|
||||
data/Example.wav172,427.18100176143,2160.6961216577,2788.9460236025,3954.9059175778
|
||||
data/Example.wav173,452.57482114579,2114.1025676742,2743.4204477576,3905.0830217336
|
||||
data/Example.wav174,467.90226079696,2073.7511504114,2697.4529992115,3887.3859821227
|
||||
data/Example.wav175,475.92000414754,2067.6324802285,2690.4017505884,3857.96586513
|
||||
data/Example.wav176,481.68391133646,2057.0658971149,2681.265502174,3857.239891604
|
||||
data/Example.wav177,478.87145793794,2062.2789451758,2687.1571941166,3870.5494834439
|
||||
data/Example.wav178,477.20648934019,2071.4285466542,2692.923932098,3888.9648361388
|
||||
data/Example.wav179,473.8527188474,2064.982939429,2702.7435699883,3902.5763828741
|
||||
data/Example.wav180,471.57348597334,2058.0083919703,2701.6452198596,3923.3000496937
|
||||
data/Example.wav181,463.39299925515,2062.4157668808,2710.7310472846,3950.0874390583
|
||||
data/Example.wav182,436.63218811152,2108.8274305101,2740.8847893121,3974.5136217463
|
||||
data/Example.wav183,421.93668550809,2124.1204206929,2752.6751275984,4003.0863194578
|
||||
data/Example.wav184,407.25856028376,2116.5799637727,2738.019557875,4117.2982534636
|
||||
data/Example.wav185,393.64185335216,2113.1989480573,2726.6963823161,4193.5175677943
|
||||
data/Example.wav186,364.38846020433,2120.2702394571,2709.5686148431,4305.4762639002
|
||||
data/Example.wav187,349.05101567169,2126.4418179177,2703.4160071699,4336.3331639418
|
||||
data/Example.wav188,382.1411944146,2101.8596066989,2680.9964054382,4291.6578616102
|
||||
data/Example.wav189,433.02739940077,2071.5117295782,2676.9681651134,4266.5163553167
|
||||
data/Example.wav190,464.18026267061,2042.020614906,2682.063520666,4219.539518887
|
||||
data/Example.wav191,469.6691192069,2020.4696403087,2699.8016147476,4214.385167188
|
||||
data/Example.wav192,467.99189099137,1997.9256497791,2704.6732819819,4205.0484095747
|
||||
data/Example.wav193,476.35608207218,1983.4636109557,2700.874753974,4168.4855783136
|
||||
data/Example.wav194,496.53408658262,1973.9974908694,2701.934619156,4137.0760858252
|
||||
data/Example.wav195,511.19249331937,1973.7508004822,2708.4244945865,4154.2537971852
|
||||
data/Example.wav196,522.4695481406,1972.1068175467,2716.813289339,4176.2050581299
|
||||
data/Example.wav197,528.05142584229,1966.0232153203,2725.6564356728,4186.216684233
|
||||
data/Example.wav198,521.3589523206,1937.6779250972,2733.1678407162,4172.0998584209
|
||||
data/Example.wav199,516.48565828683,1912.1493631225,2741.815104888,4174.6646815625
|
||||
data/Example.wav200,508.53898698691,1889.9233802827,2758.8286360602,4180.6414309284
|
||||
data/Example.wav201,511.76952280585,1873.7655572631,2754.979982981,4170.2118052041
|
||||
data/Example.wav202,520.22408989767,1860.5267884894,2751.4742143224,4160.854769779
|
||||
data/Example.wav203,522.12352013622,1869.3167586956,2748.540173029,4163.1638460118
|
||||
data/Example.wav204,522.00266727624,1863.9117553503,2743.4282874248,4165.1485437118
|
||||
data/Example.wav205,514.22151055065,1852.8469855083,2737.6839401259,4186.2289065411
|
||||
data/Example.wav206,512.34645596931,1838.5445814886,2732.2566724166,4211.0681334926
|
||||
data/Example.wav207,513.33207524931,1826.0364732311,2731.4608610801,4236.1350611562
|
||||
data/Example.wav208,526.64008466085,1823.7476761467,2743.7288021458,4233.3588707581
|
||||
data/Example.wav209,535.46958282296,1823.5055596894,2744.8773162777,4247.2223916493
|
||||
data/Example.wav210,554.18085627576,1830.3957764248,2740.5412955579,4265.2772595636
|
||||
data/Example.wav211,570.92141936336,1841.4509122446,2722.4194699268,4300.0972838303
|
||||
data/Example.wav212,617.22315723711,1849.248172076,2718.4887666488,4344.1425718346
|
||||
data/Example.wav213,665.43736400041,1836.0769721094,2729.6398049903,4367.8183999718
|
||||
data/Example.wav214,703.70393143902,1821.333238966,2741.4009387203,4382.2141359022
|
||||
data/Example.wav215,699.44283063031,1783.2536559451,2744.8245925127,4385.9946253066
|
||||
data/Example.wav216,697.3039588797,1749.6664300252,2780.8912480565,4404.0649574703
|
||||
data/Example.wav217,693.61282467771,1728.2942367977,2825.8152590749,4416.2022041456
|
||||
data/Example.wav218,671.054268248,1706.9954675441,2861.3821068079,4409.2646596234
|
||||
data/Example.wav219,658.78280251903,1686.7204976193,2861.0405145442,4386.7229762032
|
||||
data/Example.wav220,653.45315577687,1686.2393832538,2867.4387653491,4377.3885777179
|
||||
data/Example.wav221,648.37556989138,1697.6533091722,2881.3735124079,4377.8057861327
|
||||
data/Example.wav222,665.62073336552,1703.610865382,2880.5355121903,4376.3085303946
|
||||
data/Example.wav223,641.32694371434,1692.8188376314,2888.2339355312,4378.9495329249
|
||||
data/Example.wav224,614.12719575363,1643.3612544808,2838.1975081999,4369.2224175283
|
||||
data/Example.wav225,538.90808502501,1618.465641695,2819.6834265203,4352.2339391976
|
||||
data/Example.wav226,499.70213312514,1625.594124235,2818.3129727707,4345.3237451616
|
||||
data/Example.wav227,513.68333841898,1623.2294798462,2779.8807012173,4319.464067883
|
||||
data/Example.wav228,536.53889205058,1572.8807732309,2677.3376944659,4239.7824252382
|
||||
data/Example.wav229,543.49661586518,1541.25751732,2560.5591721276,4156.2211017785
|
||||
data/Example.wav230,521.92437138737,1566.6446461785,2605.9947517995,4179.1581606505
|
||||
|
+77
-93
@@ -1,41 +1,46 @@
|
||||
__author__ = 'shua'
|
||||
|
||||
import argparse
|
||||
import numpy as np
|
||||
import wave
|
||||
import os
|
||||
from os import listdir
|
||||
from os.path import isfile, join
|
||||
import math
|
||||
from typing import Optional
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
from inaSpeechSegmenter import tf_mfcc
|
||||
from inaSpeechSegmenter.features import to_wav
|
||||
from inaSpeechSegmenter.sidekit_mfcc import read_wav
|
||||
from numba import int16, njit
|
||||
from scipy.fftpack import fft
|
||||
from scipy.fftpack.realtransforms import dct
|
||||
from scipy.signal import lfilter, hamming
|
||||
from scipy.fftpack import fft, ifft
|
||||
#from scikits.talkbox.linpred import lpc # obsolete
|
||||
from scipy.signal import lfilter
|
||||
|
||||
from helpers.conch_lpc import lpc
|
||||
import shutil
|
||||
from helpers.utilities import *
|
||||
|
||||
epsilon = 0.0000000001
|
||||
prefac = .97
|
||||
|
||||
|
||||
def build_data(wav,begin=None,end=None):
|
||||
wav_in_file = wave.Wave_read(wav)
|
||||
wav_in_num_samples = wav_in_file.getnframes()
|
||||
def build_data_new(wav_path: str, begin: Optional[int], end: Optional[int]):
|
||||
y, sr, _ = read_wav(wav_path, dtype=np.int16)
|
||||
if begin is not None and end is not None:
|
||||
return y[begin * sr:end * sr]
|
||||
|
||||
|
||||
def build_data(wav, begin=None, end=None):
|
||||
wav_in_file = wave.Wave_read(str(wav))
|
||||
N = wav_in_file.getnframes()
|
||||
dstr = wav_in_file.readframes(N)
|
||||
data = np.fromstring(dstr, np.int16)
|
||||
data = np.fromstring(dstr, np.float32)
|
||||
if begin is not None and end is not None:
|
||||
#return data[begin*16000:end*16000] #numpy 1.11.0
|
||||
return data[np.int(begin*16000):np.int(end*16000)] #numpy 1.14.0
|
||||
# return data[begin*16000:end*16000] #numpy 1.11.0
|
||||
return data[np.int(begin * 16000):np.int(end * 16000)] # numpy 1.14.0
|
||||
X = []
|
||||
l = len(data)
|
||||
for i in range(0, l-100, 160):
|
||||
for i in range(0, l - 100, 160):
|
||||
X.append(data[i:i + 480])
|
||||
return X
|
||||
|
||||
|
||||
def periodogram(x, nfft=None, fs=1):
|
||||
def periodogram(x, nfft: int, fs=1):
|
||||
"""Compute the periodogram of the given signal, with the given fft size.
|
||||
|
||||
Parameters
|
||||
@@ -56,15 +61,6 @@ def periodogram(x, nfft=None, fs=1):
|
||||
fgrid : array-like
|
||||
Frequency grid over which the periodogram was estimated.
|
||||
|
||||
Examples
|
||||
--------
|
||||
Generate a signal with two sinusoids, and compute its periodogram:
|
||||
|
||||
>>> fs = 1000
|
||||
>>> x = np.sin(2 * np.pi * 0.1 * fs * np.linspace(0, 0.5, 0.5*fs))
|
||||
>>> x += np.sin(2 * np.pi * 0.2 * fs * np.linspace(0, 0.5, 0.5*fs))
|
||||
>>> px, fx = periodogram(x, 512, fs)
|
||||
|
||||
Notes
|
||||
-----
|
||||
Only real signals supported for now.
|
||||
@@ -86,7 +82,7 @@ def periodogram(x, nfft=None, fs=1):
|
||||
if nfft < n:
|
||||
raise ValueError("nfft < signal size not supported yet")
|
||||
|
||||
pxx = np.abs(fft(x, nfft)) ** 2
|
||||
pxx = np.abs(np.fft.fft(x, nfft)) ** 2
|
||||
if nfft % 2 == 0:
|
||||
pn = nfft // 2 + 1
|
||||
else:
|
||||
@@ -184,13 +180,13 @@ def atal(x, order, num_coefs):
|
||||
a, e, kk = lpc(x, order)
|
||||
c = np.zeros(num_coefs)
|
||||
c[0] = a[0]
|
||||
for m in range(1, order+1):
|
||||
for m in range(1, order + 1):
|
||||
c[m] = - a[m]
|
||||
for k in range(1, m):
|
||||
c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
|
||||
for m in range(order+1, num_coefs):
|
||||
for k in range(1, order+1):
|
||||
c[m] += (float(k)/float(m)-1)*a[k]*c[m-k]
|
||||
c[m] += (float(k) / float(m) - 1) * a[k] * c[m - k]
|
||||
for m in range(order + 1, num_coefs):
|
||||
for k in range(1, order + 1):
|
||||
c[m] += (float(k) / float(m) - 1) * a[k] * c[m - k]
|
||||
return c
|
||||
|
||||
|
||||
@@ -199,7 +195,7 @@ def preemp(input, p):
|
||||
return lfilter([1., -p], 1, input)
|
||||
|
||||
|
||||
def arspecs(input_wav,order,Atal=False):
|
||||
def arspecs(input_wav, order, Atal=False):
|
||||
data = input_wav
|
||||
if Atal:
|
||||
ar = atal(data, order, 30)
|
||||
@@ -208,57 +204,59 @@ def arspecs(input_wav,order,Atal=False):
|
||||
ar = []
|
||||
ars = arspec(data, order, 4096)
|
||||
for k, l in zip(ars[0], ars[1]):
|
||||
ar.append(math.log(math.sqrt((k**2)+(l**2))))
|
||||
for val in range(0,len(ar)):
|
||||
ar.append(math.log(math.sqrt((k ** 2) + (l ** 2))))
|
||||
for val in range(0, len(ar)):
|
||||
if ar[val] < 0.0:
|
||||
ar[val] = np.nan
|
||||
elif ar[val] == 0.0:
|
||||
ar[val] = epsilon
|
||||
ar[val] = 0.0000000001
|
||||
mspec1 = np.log10(ar)
|
||||
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
||||
ar = dct(mspec1, type=2, norm='ortho', axis=-1)
|
||||
return ar[:30]
|
||||
|
||||
|
||||
def specPS(input_wav,pitch):
|
||||
N = len(input_wav)
|
||||
samps = N // pitch
|
||||
if samps == 0:
|
||||
samps = 1
|
||||
frames = N // samps
|
||||
data = input_wav[0:frames]
|
||||
specs = periodogram(data,nfft=4096)
|
||||
for i in range(1,int(samps)):
|
||||
data = input_wav[frames*i:frames*(i+1)]
|
||||
peri = periodogram(data,nfft=4096)
|
||||
for sp in range(len(peri[0])):
|
||||
specs[0][sp] += peri[0][sp]
|
||||
for s in range(len(specs[0])):
|
||||
specs[0][s] /= float(samps)
|
||||
peri = []
|
||||
for k, l in zip(specs[0], specs[1]):
|
||||
m = math.sqrt((k ** 2) + (l ** 2))
|
||||
if m > 0: m = math.log(m)
|
||||
if m == 0: m = epsilon
|
||||
elif m < 0: m = np.nan
|
||||
peri.append(m)
|
||||
# Filter the spectrum through the triangle filterbank
|
||||
mspec = np.log10(peri)
|
||||
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
||||
ceps = dct(mspec, type=2, norm='ortho', axis=-1)
|
||||
return ceps[:50]
|
||||
def mfcc(sig: int16[:], pitch):
|
||||
N = len(sig)
|
||||
samps = N // pitch
|
||||
if samps == 0:
|
||||
samps = 1
|
||||
frames = N // samps
|
||||
data = sig[0:frames]
|
||||
|
||||
specs = periodogram(data, nfft=4096)
|
||||
for i in range(1, int(samps)):
|
||||
data = sig[frames * i:frames * (i + 1)]
|
||||
peri = periodogram(data, nfft=4096)
|
||||
specs[0] += peri[0]
|
||||
|
||||
specs[0] /= samps
|
||||
with np.errstate(divide='ignore'):
|
||||
peri = np.log(np.sqrt(specs[0] ** 2 + specs[1] ** 2))
|
||||
peri[np.isneginf(peri)] = 0.0000000001
|
||||
|
||||
# Filter the spectrum through the triangle filterbank
|
||||
mspec = np.log10(peri)
|
||||
|
||||
# Use the DCT to 'compress' the coefficients (spectrum -> cepstrum domain)
|
||||
ceps = dct(mspec, type=2, norm='ortho', axis=-1)
|
||||
|
||||
return ceps[:50]
|
||||
|
||||
|
||||
def build_single_feature_row(data, Atal):
|
||||
lpcs = [8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
|
||||
def mfcc_new(sig: int16[:], pitch):
|
||||
loge, mspec = tf_mfcc.mel_spect(sig, nwin=0.256)
|
||||
ceps = dct(mspec, type=2, norm='ortho', axis=-1)
|
||||
return ceps[:50]
|
||||
|
||||
|
||||
def build_single_feature_row(data: int16[:], atal: bool = False):
|
||||
lpc_orders = np.array([8, 9, 10, 11, 12, 13, 14, 15, 16, 17])
|
||||
arr = []
|
||||
periodo = specPS(data, 50)
|
||||
periodo = mfcc(data, 50)
|
||||
arr.extend(periodo)
|
||||
for j in lpcs:
|
||||
if Atal:
|
||||
ars = arspecs(data, j, Atal=True)
|
||||
else:
|
||||
ars = arspecs(data, j)
|
||||
for j in lpc_orders:
|
||||
ars = arspecs(data, j, Atal=atal)
|
||||
arr.extend(ars)
|
||||
for i in range(len(arr)):
|
||||
if np.isnan(np.float(arr[i])):
|
||||
@@ -267,13 +265,13 @@ def build_single_feature_row(data, Atal):
|
||||
|
||||
|
||||
def create_features(input_wav_filename, feature_filename, begin=None, end=None, Atal=False):
|
||||
tmp_wav16_filename = generate_tmp_filename("wav")
|
||||
easy_call("sox " + input_wav_filename + " -c 1 -r 16000 " + tmp_wav16_filename)
|
||||
X = build_data(tmp_wav16_filename, begin, end)
|
||||
wav = to_wav(input_wav_filename)
|
||||
X = build_data_new(wav, begin, end)
|
||||
if begin is not None and end is not None:
|
||||
arr = [input_wav_filename]
|
||||
arr.extend(build_single_feature_row(X, Atal))
|
||||
np.savetxt(feature_filename, np.asarray([arr]), delimiter=",", fmt="%s")
|
||||
os.remove(wav)
|
||||
return arr
|
||||
arcep_mat = []
|
||||
for i in range(len(X)):
|
||||
@@ -281,21 +279,7 @@ def create_features(input_wav_filename, feature_filename, begin=None, end=None,
|
||||
arr.extend(build_single_feature_row(X[i], Atal))
|
||||
arcep_mat.append(arr)
|
||||
np.savetxt(feature_filename, np.asarray(arcep_mat), delimiter=",", fmt="%s")
|
||||
|
||||
os.remove(wav)
|
||||
|
||||
return arcep_mat
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# parse arguments
|
||||
parser = argparse.ArgumentParser(description='Extract features for formants estimation.')
|
||||
parser.add_argument('wav_file', default='', help="WAV audio filename (single vowel or an whole utternace)")
|
||||
parser.add_argument('feature_file', default='', help="output feature text file")
|
||||
parser.add_argument('--begin', help="beginning time in the WAV file", default=0.0, type=float)
|
||||
parser.add_argument('--end', help="end time in the WAV file", default=-1.0, type=float)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.begin > 0.0 or args.end > 0.0:
|
||||
create_features(args.wav_file, args.feature_file, args.begin, args.end)
|
||||
else:
|
||||
create_features(args.wav_file, args.feature_file)
|
||||
|
||||
|
||||
|
||||
+37
-30
@@ -1,26 +1,30 @@
|
||||
|
||||
import extract_features as features
|
||||
import argparse
|
||||
from helpers.textgrid import *
|
||||
from helpers.utilities import *
|
||||
import shutil
|
||||
from load_estimation_model import load_estimation_model
|
||||
|
||||
|
||||
def predict_from_times(wav_filename, preds_filename, begin, end, csv_export=True):
|
||||
tmp_features_filename = generate_tmp_filename("txt")
|
||||
#tmp_features_filename = "temp/" + next(tempfile._get_candidate_names()) + ".txt"
|
||||
print("Input Array Path: " + tmp_features_filename)
|
||||
|
||||
def predict_from_times(wav_filename, preds_filename, begin, end):
|
||||
tmp_features_filename = tempfile._get_default_tempdir() + "/" + next(tempfile._get_candidate_names()) + ".txt"
|
||||
print(tmp_features_filename)
|
||||
|
||||
if begin > 0.0 or end > 0.0:
|
||||
features.create_features(wav_filename, tmp_features_filename, begin, end)
|
||||
easy_call("luajit load_estimation_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
||||
else:
|
||||
features.create_features(wav_filename, tmp_features_filename)
|
||||
easy_call("luajit load_tracking_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
||||
predictions = None
|
||||
# if begin > 0.0 or end > 0.0:
|
||||
print(wav_filename + " interval " + str(begin) + "-" + str(end) + ":")
|
||||
features.create_features(wav_filename, tmp_features_filename, begin, end)
|
||||
predictions = load_estimation_model(tmp_features_filename, preds_filename, begin, end, csv_export=csv_export)
|
||||
#easy_call("luajit load_estimation_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
||||
# else:
|
||||
# features.create_features(wav_filename, tmp_features_filename)
|
||||
# easy_call("luajit load_tracking_model.lua " + tmp_features_filename + ' ' + preds_filename)
|
||||
|
||||
delete_temp_files()
|
||||
return predictions
|
||||
|
||||
|
||||
def predict_from_textgrid(wav_filename, preds_filename, textgrid_filename, textgrid_tier):
|
||||
|
||||
print(wav_filename)
|
||||
|
||||
if os.path.exists(preds_filename):
|
||||
@@ -34,24 +38,27 @@ def predict_from_textgrid(wav_filename, preds_filename, textgrid_filename, textg
|
||||
# extract tier names
|
||||
tier_names = textgrid.tierNames()
|
||||
|
||||
if textgrid_tier in tier_names:
|
||||
|
||||
if textgrid_tier in tier_names: # run over all intervals in the tier
|
||||
tier_index = tier_names.index(textgrid_tier)
|
||||
# run over all intervals in the tier
|
||||
for interval in textgrid[tier_index]:
|
||||
if re.search(r'\S', interval.mark()):
|
||||
tmp_features_filename = generate_tmp_filename("features")
|
||||
tmp_preds = generate_tmp_filename("preds")
|
||||
features.create_features(wav_filename, tmp_features_filename, interval.xmin(), interval.xmax())
|
||||
easy_call("th load_estimation_model.lua " + tmp_features_filename + ' ' + tmp_preds)
|
||||
csv_append_row(tmp_preds, preds_filename)
|
||||
else: # process first tier
|
||||
for interval in textgrid[0]:
|
||||
if re.search(r'\S', interval.mark()):
|
||||
tmp_features_filename = generate_tmp_filename("features")
|
||||
tmp_preds = generate_tmp_filename("preds")
|
||||
features.create_features(wav_filename, tmp_features_filename, interval.xmin(), interval.xmax())
|
||||
easy_call("th load_estimation_model.lua " + tmp_features_filename + ' ' + tmp_preds)
|
||||
csv_append_row(tmp_preds, preds_filename)
|
||||
textgrid_tier = textgrid[tier_index]
|
||||
else: # process first tier
|
||||
textgrid_tier = textgrid[0]
|
||||
|
||||
for interval in textgrid_tier:
|
||||
if re.search(r'\S', interval.mark()):
|
||||
tmp_features_filename = generate_tmp_filename("features")
|
||||
tmp_preds = generate_tmp_filename("preds")
|
||||
begin = interval.xmin()
|
||||
end = interval.xmax()
|
||||
features.create_features(wav_filename, tmp_features_filename, begin, end)
|
||||
load_estimation_model(tmp_features_filename, tmp_preds, begin, end)
|
||||
#easy_call("th load_estimation_model.lua " + tmp_features_filename + ' ' + tmp_preds)
|
||||
csv_append_row(tmp_preds, preds_filename)
|
||||
delete_temp_files()
|
||||
|
||||
delete_temp_files()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# parse arguments
|
||||
|
||||
@@ -27,21 +27,22 @@
|
||||
# THE SOFTWARE.
|
||||
|
||||
#import librosa
|
||||
import librosa
|
||||
import numpy as np
|
||||
import scipy as sp
|
||||
from numba import njit
|
||||
from scipy.signal import lfilter
|
||||
|
||||
from scipy.fftpack import fft, ifft
|
||||
from scipy.signal import gaussian
|
||||
from scipy.signal.windows import gaussian
|
||||
|
||||
#from ..helper import nextpow2
|
||||
#from ..functions import BaseAnalysisFunction
|
||||
|
||||
# Source: https://github.com/mmcauliffe/Conch-sounds/blob/master/conch/analysis/helper.py
|
||||
def nextpow2(x):
|
||||
@njit
|
||||
def next_pow_2(x: float) -> int:
|
||||
"""Return the first integer N such that 2**N >= abs(x)"""
|
||||
return np.ceil(np.log2(np.abs(x)))
|
||||
|
||||
|
||||
def lpc_ref(signal, order):
|
||||
"""Compute the Linear Prediction Coefficients.
|
||||
|
||||
@@ -175,7 +176,7 @@ def acorr_lpc(x, axis=-1):
|
||||
raise ValueError("Complex input not supported yet")
|
||||
|
||||
maxlag = x.shape[axis]
|
||||
nfft = int(2 ** nextpow2(2 * maxlag - 1))
|
||||
nfft = int(2 ** next_pow_2(2 * maxlag - 1))
|
||||
|
||||
if axis != -1:
|
||||
x = np.swapaxes(x, -1, axis)
|
||||
|
||||
+10
-1
@@ -25,6 +25,8 @@ import wave
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
from isort import file
|
||||
|
||||
|
||||
def csv_append_row(tmp_preds, preds_filename, with_headers=True):
|
||||
|
||||
@@ -55,7 +57,9 @@ def csv_append_row(tmp_preds, preds_filename, with_headers=True):
|
||||
|
||||
|
||||
def generate_tmp_filename(extension):
|
||||
return tempfile._get_default_tempdir() + "/" + next(tempfile._get_candidate_names()) + "." + extension
|
||||
if not os.path.isdir('temp'):
|
||||
os.mkdir('temp')
|
||||
return "temp/" + next(tempfile._get_candidate_names()) + "." + extension
|
||||
|
||||
|
||||
def logging_defaults(logging_level="INFO"):
|
||||
@@ -169,3 +173,8 @@ def is_valid_wav(filename):
|
||||
or wav_file.getcomptype() != 'NONE':
|
||||
return False
|
||||
return True
|
||||
|
||||
def delete_temp_files():
|
||||
print("Clearing temp files...")
|
||||
for filename in os.listdir("temp"):
|
||||
os.remove("temp/" + filename)
|
||||
|
||||
@@ -0,0 +1,73 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from functools import reduce
|
||||
|
||||
|
||||
class LambdaBase(nn.Sequential):
|
||||
def __init__(self, fn, *args):
|
||||
super(LambdaBase, self).__init__(*args)
|
||||
self.lambda_func = fn
|
||||
|
||||
def forward_prepare(self, input):
|
||||
output = []
|
||||
for module in self._modules.values():
|
||||
output.append(module(input))
|
||||
return output if output else input
|
||||
|
||||
|
||||
class Lambda(LambdaBase):
|
||||
def forward(self, input):
|
||||
return self.lambda_func(self.forward_prepare(input))
|
||||
|
||||
|
||||
class LambdaMap(LambdaBase):
|
||||
def forward(self, input):
|
||||
return list(map(self.lambda_func, self.forward_prepare(input)))
|
||||
|
||||
|
||||
class LambdaReduce(LambdaBase):
|
||||
def forward(self, input):
|
||||
return reduce(self.lambda_func, self.forward_prepare(input))
|
||||
|
||||
|
||||
def load_estimation_model(inputfilename, outputfilename, begin, end, csv_export=True):
|
||||
with open(inputfilename, "r") as rf:
|
||||
contents = rf.read()
|
||||
contents = contents.split(",")
|
||||
|
||||
data = torch.Tensor(1, 350)
|
||||
name = ""
|
||||
for i in range(len(contents)):
|
||||
if i == 0:
|
||||
name = contents[i].strip()
|
||||
else:
|
||||
val = float(contents[i].strip())
|
||||
data[0][i - 1] = val
|
||||
|
||||
model = nn.Sequential(
|
||||
nn.Sequential(Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x), nn.Linear(350, 1024)),
|
||||
nn.Sigmoid(),
|
||||
nn.Sequential(Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x), nn.Linear(1024, 512)),
|
||||
nn.Sigmoid(),
|
||||
nn.Sequential(Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x), nn.Linear(512, 256)),
|
||||
nn.Sigmoid(),
|
||||
nn.Sequential(Lambda(lambda x: x.view(1, -1) if 1 == len(x.size()) else x), nn.Linear(256, 4)),
|
||||
)
|
||||
|
||||
model.load_state_dict(torch.load("em.pth"))
|
||||
my_prediction = model.forward(data)
|
||||
|
||||
prediction_dict = {}
|
||||
prediction_dict["F1"] = 1000 * float(my_prediction[0][0])
|
||||
prediction_dict["F2"] = 1000 * float(my_prediction[0][1])
|
||||
prediction_dict["F3"] = 1000 * float(my_prediction[0][2])
|
||||
prediction_dict["F4"] = 1000 * float(my_prediction[0][3])
|
||||
|
||||
if csv_export:
|
||||
with open(outputfilename, "w") as wf:
|
||||
wf.write("NAME,begin,end,F1,F2,F3,F4\n")
|
||||
wf.write(name + "," + str(begin) + "," + str(end) + "," + \
|
||||
str(prediction_dict["F1"]) + "," + str(prediction_dict["F2"]) + "," + \
|
||||
str(prediction_dict["F3"]) + "," + str(prediction_dict["F4"]) + "\n")
|
||||
|
||||
return prediction_dict
|
||||
@@ -0,0 +1,13 @@
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
from inaSpeechSegmenter import tf_mfcc
|
||||
|
||||
from formants import predict_from_times
|
||||
|
||||
if __name__ == '__main__':
|
||||
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/opt/cuda'
|
||||
# predict_from_times('data/VT 150hz baseline example.mp3', 'data/VT Predictions.csv', 0, 1)
|
||||
# tf_mfcc.power_spectrum(np.zeros(1024, dtype=np.int16), 1024, 512)
|
||||
predict_from_times('data/Example-f32le.wav', 'data/Example-F32-Predictions.csv', 0, 1)
|
||||
# predict_from_times('data/Example.wav', 'data/Example-Predictions.csv', 0, 1)
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user