convolens/pyproject.toml at main · didmar/convolens · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
[project]
name = "convolens"
version = "0.1.0"
description = "Pipeline to download, transcribe, search, and remix interview videos from YouTube channels"
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
    "typer>=0.12.0",
    "tqdm>=4.0.0",
    "rich>=13.0",
    "numpy>=1.24",
    "pysbd>=0.3.0",
    "python-dotenv>=1.0",
    "deal>=4.24",
    "typing-extensions>=4.12",
]

[project.optional-dependencies]
ingest = [
    "yt-dlp>=2024.0.0",
]
transcribe = [
    # torch is provided by the cpu/gpu extras (mutually exclusive).
    # Use: uv sync --extra transcribe --extra cpu   (or --extra gpu)
    # whisperx, torchcodec, and torch versions are tightly coupled:
    #   whisperx 3.8.x requires torch >=2.8,<2.9
    #   torchcodec 0.7 is the version matching torch 2.8 (see compatibility table
    #     at github.com/pytorch/torchcodec)
    #   whisperx >=3.8 requires ctranslate2 >=4.5, which links cuDNN 9 (older
    #     ctranslate2 links cuDNN 8, causing "libcudnn_ops_infer.so.8 not found"
    #     on modern GPU setups)
    # To upgrade torch, bump cpu/gpu extras and whisperx in lockstep.
    "faster-whisper>=1.1.1",
    "pyannote.audio>=4.0.0",
    "whisperx>=3.8.0",
    "omegaconf>=2.0.0",
    "noisereduce>=3.0.0",
    "soundfile>=0.12.0",
]
embed = [
    # torch is provided by the cpu/gpu extras (mutually exclusive).
    "sentence-transformers>=3.0.0",
    "scikit-learn>=1.4.0",
    "plotly>=5.0.0",
    "umap-learn>=0.5.7",
    "nltk>=3.8",
]
db = [
    "sqlalchemy[asyncio]>=2.0",
    "pgvector>=0.3",
    "psycopg[binary]>=3.1",
    "alembic>=1.13",
]
llm = [
    "langgraph>=0.2",
    "langchain-google-genai>=2.0",
    "langchain-openai>=0.3",
    "langchain-core>=0.3",
    "google-genai>=1.0",
    "openai>=1.0",
]
web = [
    "fastapi>=0.129.0",
    "uvicorn>=0.41.0",
    "httpx>=0.28.1",
    "anyio>=4.12.1",
    "sse-starlette>=2.0",
]
benchmark = [
    "jiwer>=3.0.0",
    "datasets>=3.0.0",
    "pyannote.metrics>=3.2.0",
    "whisper-normalizer>=0.0.5",
]
tracing = [
    "arize-phoenix-otel>=0.6",
    "openinference-instrumentation-google-genai>=0.1",
    "openinference-instrumentation-langchain>=0.1.12",
]
# CPU/GPU extras: mutually exclusive, controls which PyTorch index is used.
#   uv sync --extra transcribe --extra cpu    # CPU-only
#   uv sync --extra transcribe --extra gpu    # GPU with CUDA 12.8
#   uv sync --extra all --extra cpu           # Everything + CPU torch
cpu = [
    "torch>=2.8.0,<2.9",
    "torchaudio>=2.8.0,<2.9",
    "torchcodec>=0.6.0,<0.8.0",
]
gpu = [
    "torch>=2.8.0,<2.9",
    "torchaudio>=2.8.0,<2.9",
    "torchcodec>=0.6.0,<0.8.0",
]
all = [
    "convolens[ingest,transcribe,embed,db,llm,web,benchmark,tracing]",
]

[project.scripts]
convolens = "convolens.cli:main"

[dependency-groups]
dev = [
    "ruff",
    "ty",
    "pytest",
    "pytest-asyncio>=1.3.0",
    "hypothesis>=6.0",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.ruff]
line-length = 120

[tool.ruff.lint]
select = ["E", "F", "I", "N", "UP", "B", "SIM", "TCH"]

[tool.ty.src]
exclude = ["convolens/benchmark/"]

[tool.ty.environment]
python-version = "3.12"

[tool.uv]
# grpcio 1.78.1 was yanked due to gcloud serverless outage (github.com/grpc/grpc/issues/41725)
constraint-dependencies = ["grpcio!=1.78.1"]
conflicts = [
    [{ extra = "cpu" }, { extra = "gpu" }],
]

[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true

[[tool.uv.index]]
name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
explicit = true

[tool.uv.sources]
torch = [
    { index = "pytorch-cpu", extra = "cpu" },
    { index = "pytorch-cu128", extra = "gpu" },
]
torchaudio = [
    { index = "pytorch-cpu", extra = "cpu" },
    { index = "pytorch-cu128", extra = "gpu" },
]
torchcodec = [
    { index = "pytorch-cpu", extra = "cpu" },
    { index = "pytorch-cu128", extra = "gpu" },
]