Skip to content
This repository was archived by the owner on Jun 19, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
*.pyc
*.swp
*.DS_Store
*.egg-info
.pit*
/.run
/werlog.js
Expand Down
13 changes: 8 additions & 5 deletions bin/build_sdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from deepspeech_training.util.downloader import SIMPLE_BAR
from deepspeech_training.util.sample_collections import (
DirectSDBWriter,
samples_from_files,
samples_from_sources,
)

AUDIO_TYPE_LOOKUP = {"wav": AUDIO_TYPE_WAV, "opus": AUDIO_TYPE_OPUS}
Expand All @@ -26,12 +26,10 @@ def build_sdb():
with DirectSDBWriter(
CLI_ARGS.target, audio_type=audio_type, labeled=not CLI_ARGS.unlabeled
) as sdb_writer:
samples = samples_from_files(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled)
samples = samples_from_sources(CLI_ARGS.sources, labeled=not CLI_ARGS.unlabeled)
bar = progressbar.ProgressBar(max_value=len(samples), widgets=SIMPLE_BAR)
for sample in bar(
change_audio_types(
samples, audio_type=audio_type, processes=CLI_ARGS.workers
)
change_audio_types(samples, audio_type=audio_type, bitrate=CLI_ARGS.bitrate, processes=CLI_ARGS.workers)
):
sdb_writer.add(sample)

Expand All @@ -55,6 +53,11 @@ def handle_args():
choices=AUDIO_TYPE_LOOKUP.keys(),
help="Audio representation inside target SDB",
)
parser.add_argument(
"--bitrate",
type=int,
help="Bitrate for lossy compressed SDB samples like in case of --audio-type opus",
)
parser.add_argument(
"--workers", type=int, default=None, help="Number of encoding SDB workers"
)
Expand Down
66 changes: 66 additions & 0 deletions bin/compare_samples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/usr/bin/env python
Comment thread
reuben marked this conversation as resolved.
"""
Tool for comparing two wav samples
"""
import sys
import argparse

from deepspeech_training.util.audio import AUDIO_TYPE_NP, mean_dbfs
from deepspeech_training.util.sample_collections import load_sample


def fail(message):
print(message, file=sys.stderr, flush=True)
sys.exit(1)


def compare_samples():
sample1 = load_sample(CLI_ARGS.sample1)
sample2 = load_sample(CLI_ARGS.sample2)
if sample1.audio_format != sample2.audio_format:
fail('Samples differ on: audio-format ({} and {})'.format(sample1.audio_format, sample2.audio_format))
if sample1.duration != sample2.duration:
fail('Samples differ on: duration ({} and {})'.format(sample1.duration, sample2.duration))
sample1.change_audio_type(AUDIO_TYPE_NP)
sample2.change_audio_type(AUDIO_TYPE_NP)
audio_diff = sample1.audio - sample2.audio
diff_dbfs = mean_dbfs(audio_diff)
differ_msg = 'Samples differ on: sample data ({:0.2f} dB difference) '.format(diff_dbfs)
equal_msg = 'Samples are considered equal ({:0.2f} dB difference)'.format(diff_dbfs)
if CLI_ARGS.if_differ:
if diff_dbfs <= CLI_ARGS.threshold:
fail(equal_msg)
if not CLI_ARGS.no_success_output:
print(differ_msg, file=sys.stderr, flush=True)
else:
if diff_dbfs > CLI_ARGS.threshold:
fail(differ_msg)
if not CLI_ARGS.no_success_output:
print(equal_msg, file=sys.stderr, flush=True)


def handle_args():
parser = argparse.ArgumentParser(
description="Tool for checking similarity of two samples"
)
parser.add_argument("sample1", help="Filename of sample 1 to compare")
parser.add_argument("sample2", help="Filename of sample 2 to compare")
parser.add_argument("--threshold", type=float, default=-60.0,
help="dB of sample deltas above which they are considered different")
parser.add_argument(
"--if-differ",
action="store_true",
help="If to succeed and return status code 0 on different signals and fail on equal ones (inverse check)."
"This will still fail on different formats or durations.",
)
parser.add_argument(
"--no-success-output",
action="store_true",
help="Stay silent on success (if samples are equal of - with --if-differ - samples are not equal)",
)
return parser.parse_args()


if __name__ == "__main__":
CLI_ARGS = handle_args()
compare_samples()
110 changes: 76 additions & 34 deletions bin/play.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,72 @@
#!/usr/bin/env python
"""
Tool for playing samples from Sample Databases (SDB files) and DeepSpeech CSV files
Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) and DeepSpeech CSV files
Use "python3 build_sdb.py -h" for help
"""

import argparse
import random
import os
import sys
import random
import argparse

from deepspeech_training.util.audio import AUDIO_TYPE_PCM
from deepspeech_training.util.sample_collections import LabeledSample, samples_from_file


def play_sample(samples, index):
if index < 0:
index = len(samples) + index
if CLI_ARGS.random:
index = random.randint(0, len(samples))
elif index >= len(samples):
print("No sample with index {}".format(CLI_ARGS.start))
sys.exit(1)
sample = samples[index]
print('Sample "{}"'.format(sample.sample_id))
if isinstance(sample, LabeledSample):
print(' "{}"'.format(sample.transcript))
sample.change_audio_type(AUDIO_TYPE_PCM)
rate, channels, width = sample.audio_format
wave_obj = simpleaudio.WaveObject(sample.audio, channels, width, rate)
play_obj = wave_obj.play()
play_obj.wait_done()
from deepspeech_training.util.audio import LOADABLE_AUDIO_EXTENSIONS, AUDIO_TYPE_PCM, AUDIO_TYPE_WAV
from deepspeech_training.util.sample_collections import SampleList, LabeledSample, samples_from_source, augment_samples


def play_collection():
samples = samples_from_file(CLI_ARGS.collection, buffering=0)
def get_samples_in_play_order():
ext = os.path.splitext(CLI_ARGS.source)[1].lower()
if ext in LOADABLE_AUDIO_EXTENSIONS:
samples = SampleList([(CLI_ARGS.source, 0)], labeled=False)
else:
samples = samples_from_source(CLI_ARGS.source, buffering=0)
played = 0
index = CLI_ARGS.start
while True:
if 0 <= CLI_ARGS.number <= played:
return
play_sample(samples, index)
if CLI_ARGS.random:
yield samples[random.randint(0, len(samples) - 1)]
elif index < 0:
yield samples[len(samples) + index]
elif index >= len(samples):
print("No sample with index {}".format(CLI_ARGS.start))
sys.exit(1)
else:
yield samples[index]
played += 1
index = (index + 1) % len(samples)


def play_collection():
samples = get_samples_in_play_order()
samples = augment_samples(samples,
audio_type=AUDIO_TYPE_PCM,
augmentation_specs=CLI_ARGS.augment,
process_ahead=0,
fixed_clock=CLI_ARGS.clock)
for sample in samples:
if not CLI_ARGS.quiet:
print('Sample "{}"'.format(sample.sample_id), file=sys.stderr)
if isinstance(sample, LabeledSample):
print(' "{}"'.format(sample.transcript), file=sys.stderr)
if CLI_ARGS.pipe:
sample.change_audio_type(AUDIO_TYPE_WAV)
sys.stdout.buffer.write(sample.audio.getvalue())
return
wave_obj = simpleaudio.WaveObject(sample.audio,
sample.audio_format.channels,
sample.audio_format.width,
sample.audio_format.rate)
play_obj = wave_obj.play()
play_obj.wait_done()


def handle_args():
parser = argparse.ArgumentParser(
description="Tool for playing samples from Sample Databases (SDB files) "
description="Tool for playing (and augmenting) single samples or samples from Sample Databases (SDB files) "
"and DeepSpeech CSV files"
)
parser.add_argument("collection", help="Sample DB or CSV file to play samples from")
parser.add_argument("source", help="Sample DB, CSV or WAV file to play samples from")
parser.add_argument(
"--start",
type=int,
Expand All @@ -66,16 +84,40 @@ def handle_args():
action="store_true",
help="If samples should be played in random order",
)
parser.add_argument(
"--augment",
action='append',
help="Add an augmentation operation",
)
parser.add_argument(
"--clock",
type=float,
default=0.5,
help="Simulates clock value used for augmentations during training."
"Ranges from 0.0 (representing parameter start values) to"
"1.0 (representing parameter end values)",
)
parser.add_argument(
"--pipe",
action="store_true",
help="Pipe first sample as wav file to stdout. Forces --number to 1.",
)
parser.add_argument(
"--quiet",
action="store_true",
help="No info logging to console",
)
return parser.parse_args()


if __name__ == "__main__":
try:
import simpleaudio
except ModuleNotFoundError:
print('play.py requires Python package "simpleaudio"')
sys.exit(1)
CLI_ARGS = handle_args()
if not CLI_ARGS.pipe:
try:
import simpleaudio
except ModuleNotFoundError:
print('Unless using the --pipe flag, play.py requires Python package "simpleaudio" for playing samples')
sys.exit(1)
try:
play_collection()
except KeyboardInterrupt:
Expand Down
66 changes: 66 additions & 0 deletions bin/run-tc-signal_augmentations.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/sh

set -xe

ldc93s1_dir=`cd data/smoke_test; pwd`
ldc93s1_csv="${ldc93s1_dir}/LDC93S1.csv"
ldc93s1_wav="${ldc93s1_dir}/LDC93S1.wav"
ldc93s1_overlay_csv="${ldc93s1_dir}/LDC93S1_overlay.csv"
ldc93s1_overlay_wav="${ldc93s1_dir}/LDC93S1_reversed.wav"

play="python bin/play.py --number 1 --quiet"
compare="python bin/compare_samples.py --no-success-output"

if [ ! -f "${ldc93s1_csv}" ]; then
echo "Downloading and preprocessing LDC93S1 example data, saving in ${ldc93s1_dir}."
python -u bin/import_ldc93s1.py ${ldc93s1_dir}
fi;

if [ ! -f "${ldc93s1_overlay_csv}" ]; then
echo "Reversing ${ldc93s1_wav} to ${ldc93s1_overlay_wav}."
sox "${ldc93s1_wav}" "${ldc93s1_overlay_wav}" reverse

echo "Creating ${ldc93s1_overlay_csv}."
printf "wav_filename\n${ldc93s1_overlay_wav}" > "${ldc93s1_overlay_csv}"
fi;

if ! $compare --if-differ "${ldc93s1_wav}" "${ldc93s1_overlay_wav}"; then
echo "Sample comparison tool not working correctly"
exit 1
fi

$play ${ldc93s1_wav} --augment overlay[source="${ldc93s1_overlay_csv}",snr=20] --pipe >/tmp/overlay-test.wav
if ! $compare --if-differ "${ldc93s1_wav}" /tmp/overlay-test.wav; then
echo "Overlay augmentation had no effect or changed basic sample properties"
exit 1
fi

$play ${ldc93s1_wav} --augment reverb[delay=50.0,decay=2.0] --pipe >/tmp/reverb-test.wav
if ! $compare --if-differ "${ldc93s1_wav}" /tmp/reverb-test.wav; then
echo "Reverb augmentation had no effect or changed basic sample properties"
exit 1
fi

$play ${ldc93s1_wav} --augment gaps[n=10,size=100.0] --pipe >/tmp/gaps-test.wav
if ! $compare --if-differ "${ldc93s1_wav}" /tmp/gaps-test.wav; then
echo "Gaps augmentation had no effect or changed basic sample properties"
exit 1
fi

$play ${ldc93s1_wav} --augment resample[rate=4000] --pipe >/tmp/resample-test.wav
if ! $compare --if-differ "${ldc93s1_wav}" /tmp/resample-test.wav; then
echo "Resample augmentation had no effect or changed basic sample properties"
exit 1
fi

$play ${ldc93s1_wav} --augment codec[bitrate=4000] --pipe >/tmp/codec-test.wav
if ! $compare --if-differ "${ldc93s1_wav}" /tmp/codec-test.wav; then
echo "Codec augmentation had no effect or changed basic sample properties"
exit 1
fi

$play ${ldc93s1_wav} --augment volume --pipe >/tmp/volume-test.wav
if ! $compare --if-differ "${ldc93s1_wav}" /tmp/volume-test.wav; then
echo "Volume augmentation had no effect or changed basic sample properties"
exit 1
fi
Loading