# SubScript
A short shell script for converting a text file into an audio file with synchronized subtitles.
Dependencies:
- bash shell
- flite for text to speech
- ffmpeg for audio conversion and concatenation
- sndfile-info to get duration of audio files
- bc for time stamp parsing and formatting
- standard utilities like grep, sed, tr, mktemp, cp, rm, ls, …
# 1 Source
#!/bin/bash
# subscript.sh 2023 Claude Heiland-Allen
# license: too short/trivial for licensing
# converts a text file to WAV audio + SRT subtitles
# text file should have paragraphs separated by single blank lines
#
# check arguments
if [ "x" = "x$1" -o "x" = "x$2" -o "x" = "x$3" ]
then
echo "usage: $0 input.txt output.wav output.srt"
exit
fi
intxt="$1"
outwav="$2"
outsrt="$3"
# create a temporary direcctory
tmp="$(mktemp -d -t "subscript-XXXXXXXX")"
# initialize frame counters
index=0
# integer timestamp in milliseconds since start
now=0
# make a small silent pause before each paragraph
pause=500
pausesecs="$(echo "scale=3 ; $pause / 1000" | bc -s -q | sed 's/^[.]/0./')"
ffmpeg -hide_banner -loglevel error -ar 48000 -t "$pausesecs" -f s16le -acodec pcm_s16le -ac 2 -i /dev/zero -y "${tmp}/pause.wav" < /dev/null
# for each paragraph in the first argument (text file)
cat "${intxt}" |
while read text
do
index=$((index + 1))
# add silent pause to playlist
echo "file '${tmp}/pause.wav'" >> "${tmp}/out.concat"
# accumulate paragraph (once with spaces, other with @ replacing \n)
para="$text"
now=$((now + pause))
while read line
do
if [ "x" = "x$line" ]
then
break
fi
text="$text $line"
para="$para@$line"
done
# convert paragraph into audio using text to speech
flite -t "$text" -o "${tmp}/tmp.wav"
# convert to stereo 48000Hz labeled by index
ffmpeg -hide_banner -loglevel error -i "${tmp}/tmp.wav" -ar 48000 -ac 2 -y "${tmp}/${index}.wav" < /dev/null
# add paragraph to playlist
echo "file '${tmp}/${index}.wav'" >> "${tmp}/out.concat"
# compute length of the output in milliseconds
length="$(sndfile-info "${tmp}/tmp.wav" | grep Duration | tr "\t:." " " | ( read header hh mm ss ms ; echo "(($hh * 60 + $mm) * 60 + $ss) * 1000 + $ms" | bc -s -q | sed 's/^[.]/0./') )"
# format SRT timestamps
start=$now
end=$((now + length))
now=$((start + length))
start="$(printf "%02d:%02d:%02d,%03d" "$(echo "$start / (1000 * 60 * 60)" | bc -s -q)" "$(echo "$start / (1000 * 60) % 60" | bc -s -q)" "$(echo "$start / 1000 % 60" | bc -s -q)" "$(echo "$start % 1000" | bc -s -q)")"
end="$(printf "%02d:%02d:%02d,%03d" "$(echo "$end / (1000 * 60 * 60)" | bc -s -q)" "$(echo "$end / (1000 * 60) % 60" | bc -s -q)" "$(echo "$end / 1000 % 60" | bc -s -q)" "$(echo "$end % 1000" | bc -s -q)")"
# add paragraph to output subtitle file
echo "${index}" >> "${tmp}/out.srt"
echo "${start} --> ${end}" >> "${tmp}/out.srt"
echo "${para}" | tr "@" "\n" >> "${tmp}/out.srt"
echo >> "${tmp}/out.srt"
done
# final silent pause at the end
echo "file '${tmp}/pause.wav'" >> "${tmp}/out.concat"
# convert playlist to single file
ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i "${tmp}/out.concat" -codec:a copy "${outwav}"
cp -i "${tmp}/out.srt" "${outsrt}"
# cleanup temporary directory
rm -r "${tmp}"
# output report
ls -1sh "${outwav}" "${outsrt}"
# 2 Bugs
Beware if the character @ appears anywhere in your input text.