[tool.poetry] name = "msclap" version = "1.3.0" description = "CLAP (Contrastive Language-Audio Pretraining) is a model that learns acoustic concepts from natural language supervision and enables “Zero-Shot” inference. The model has been extensively evaluated in 26 audio downstream tasks achieving SoTA in several of them including classification, retrieval, and captioning." authors = ["Benjamin Elizalde and Soham Deshmukh and Huaming Wang"] license = "MIT" readme = "README.md" packages = [ { include = "msclap" }, ] [tool.poetry.dependencies] python = "^3.8" librosa = "^0.10.1" numpy = ">=1.25.0,<1.26.0" numba = ">=0.58.0,<0.59.0" pandas = ">=2.0.0,<2.1.0" torch = "^2.1.0" torchaudio = "^2.1.0" torchlibrosa = "^0.1.0" torchvision = "^0.16.0" tqdm = "^4.66.1" transformers = "^4.34.0" pyyaml = "^6.0.1" scikit-learn = "^1.3.1" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api"