[tool.poetry]
name = "msclap"
version = "1.3.0"
description = "CLAP (Contrastive Language-Audio Pretraining) is a model that learns acoustic concepts from natural language supervision and enables “Zero-Shot” inference. The model has been extensively evaluated in 26 audio downstream tasks achieving SoTA in several of them including classification, retrieval, and captioning."
authors = ["Benjamin Elizalde and Soham Deshmukh and Huaming Wang"]
license = "MIT"
readme = "README.md"
packages = [
    { include = "msclap" },
]

[tool.poetry.dependencies]
python = "^3.8"
librosa = "^0.10.1"
numpy = ">=1.25.0,<1.26.0"
numba = ">=0.58.0,<0.59.0"
pandas = ">=2.0.0,<2.1.0"
torch = "^2.1.0"
torchaudio = "^2.1.0"
torchlibrosa = "^0.1.0"
torchvision = "^0.16.0"
tqdm = "^4.66.1"
transformers = "^4.34.0"
pyyaml = "^6.0.1"
scikit-learn = "^1.3.1"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"