# SubScript

A short shell script for converting a text file into an audio file with synchronized subtitles.

Dependencies:

# 1 Source

#!/bin/bash
# subscript.sh 2023 Claude Heiland-Allen
# license: too short/trivial for licensing
# converts a text file to WAV audio + SRT subtitles
# text file should have paragraphs separated by single blank lines
#
# check arguments
if [ "x" = "x$1" -o "x" = "x$2" -o "x" = "x$3" ]
then
  echo "usage: $0 input.txt output.wav output.srt"
  exit
fi
intxt="$1"
outwav="$2"
outsrt="$3"
# create a temporary direcctory
tmp="$(mktemp -d -t "subscript-XXXXXXXX")"
# initialize frame counters
index=0
# integer timestamp in milliseconds since start
now=0
# make a small silent pause before each paragraph
pause=500
pausesecs="$(echo "scale=3 ; $pause / 1000" | bc -s -q | sed 's/^[.]/0./')"
ffmpeg -hide_banner -loglevel error -ar 48000 -t "$pausesecs" -f s16le -acodec pcm_s16le -ac 2 -i /dev/zero -y "${tmp}/pause.wav" < /dev/null
# for each paragraph in the first argument (text file)
cat "${intxt}" |
while read text
do
  index=$((index + 1))
  # add silent pause to playlist
  echo "file '${tmp}/pause.wav'" >> "${tmp}/out.concat"
  # accumulate paragraph (once with spaces, other with @ replacing \n)
  para="$text"
  now=$((now + pause))
  while read line
  do
    if [ "x" = "x$line" ]
    then
      break
    fi
    text="$text $line"
    para="$para@$line"
  done
  # convert paragraph into audio using text to speech
  flite -t "$text" -o "${tmp}/tmp.wav"
  # convert to stereo 48000Hz labeled by index
  ffmpeg -hide_banner -loglevel error -i "${tmp}/tmp.wav" -ar 48000 -ac 2 -y "${tmp}/${index}.wav" < /dev/null
  # add paragraph to playlist
  echo "file '${tmp}/${index}.wav'" >> "${tmp}/out.concat"
  # compute length of the output in milliseconds
  length="$(sndfile-info "${tmp}/tmp.wav" | grep Duration | tr "\t:." "   " | ( read header hh mm ss ms ; echo "(($hh * 60 + $mm) * 60 + $ss) * 1000 + $ms" | bc -s -q | sed 's/^[.]/0./') )"
  # format SRT timestamps
  start=$now
  end=$((now + length))
  now=$((start + length))
  start="$(printf "%02d:%02d:%02d,%03d" "$(echo "$start / (1000 * 60 * 60)" | bc -s -q)" "$(echo "$start / (1000 * 60) % 60" | bc -s -q)" "$(echo "$start / 1000 % 60" | bc -s -q)" "$(echo "$start % 1000" | bc -s -q)")"
  end="$(printf "%02d:%02d:%02d,%03d" "$(echo "$end / (1000 * 60 * 60)" | bc -s -q)" "$(echo "$end / (1000 * 60) % 60" | bc -s -q)" "$(echo "$end / 1000 % 60" | bc -s -q)" "$(echo "$end % 1000" | bc -s -q)")"
  # add paragraph to output subtitle file
  echo "${index}" >> "${tmp}/out.srt"
  echo "${start} --> ${end}" >> "${tmp}/out.srt"
  echo "${para}" | tr "@" "\n" >> "${tmp}/out.srt"
  echo >> "${tmp}/out.srt"
done
# final silent pause at the end
echo "file '${tmp}/pause.wav'" >> "${tmp}/out.concat"
# convert playlist to single file
ffmpeg -hide_banner -loglevel error -f concat -safe 0 -i "${tmp}/out.concat" -codec:a copy "${outwav}"
cp -i "${tmp}/out.srt" "${outsrt}"
# cleanup temporary directory
rm -r "${tmp}"
# output report
ls -1sh "${outwav}" "${outsrt}"

# 2 Bugs

Beware if the character @ appears anywhere in your input text.