音声データを作るのは簡単だが、遅いです。
そこで高速化するための並列処理のコードを書いたが、エラーが出ました。
・コード
# テキストを音声に変換
def text_to_speech(segment, lang='ja'):
tts = gTTS(text=segment, lang=lang)
buffer = BytesIO()
tts.save(buffer)
return AudioSegment.from_mp3(buffer)
def convert_long_text(text, output_path, lang='ja'):
# テキストをセグメントに分割
chunks = text_processing_utils.split_text(text, "。")
audio_segments = []
# 並列に音声変換を実行
with concurrent.futures.ThreadPoolExecutor() as executor:
for audio in executor.map(text_to_speech, chunks, [lang]*len(chunks)):
audio_segments.append(audio)
# 音声セグメントを結合
combined_audio = sum(audio_segments, AudioSegment.empty())
combined_audio.export(output_path, format="mp3")
return combined_audio
・エラー
Traceback (most recent call last):
File "", line 198, in _run_module_as_main
File "", line 88, in _run_code
File "/Users/user/.vscode/extensions/ms-python.python-2023.18.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/__main__.py", line 91, in
main()
File "/Users/user/.vscode/extensions/ms-python.python-2023.18.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/__main__.py", line 47, in main
launcher.connect(host, port)
File "/Users/user/.vscode/extensions/ms-python.python-2023.18.0/pythonFiles/lib/python/debugpy/adapter/../../debugpy/launcher/../../debugpy/launcher/__init__.py", line 27, in connect
sock.connect((host, port))
ConnectionRefusedError: [Errno 61] Connection refused
・Visual Studio Codeのsettings.json
{
"python.condaPath": "/Users/user/anaconda3/bin/conda",
"python.defaultInterpreterPath": "/Users/user/anaconda3/bin/python",
"python.experiments.optInto": [
"pythonTerminalEnvVarActivation"
],
"python.pythonPath": "/Users/user/anaconda3/bin/python",
"python.terminal.activateEnvInCurrentTerminal": true,
"python.terminal.activateEnvironment": true,
"terminal.integrated.inheritEnv": false,
"window.zoomLevel": 1,
"python.autoComplete.extraPaths": [
]
}
chatGPTに上記のログを提出してもわからなかったので、Visual Studio Codeを「デバッグを開始」で実行しました。
すると、問題の箇所がハイライトされ、カーソルを合わせると次のような表示がされました。
例外が発生しました: CouldntDecodeError
Decoding failed. ffmpeg returned error code: 1
Output from ffmpeg/avlib:
ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers
built with Apple clang version 12.0.0 (clang-1200.0.32.29)
configuration: --prefix=/usr/local/Cellar/ffmpeg/5.1.2_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox
libavutil 57. 28.100 / 57. 28.100
libavcodec 59. 37.100 / 59. 37.100
libavformat 59. 27.100 / 59. 27.100
libavdevice 59. 7.100 / 59. 7.100
libavfilter 8. 44.100 / 8. 44.100
libswscale 6. 7.100 / 6. 7.100
libswresample 4. 7.100 / 4. 7.100
libpostproc 56. 6.100 / 56. 6.100
[cache @ 0x7fdf38b04880] Inner protocol failed to seekback end : -78
Last message repeated 1 times
[mp3 @ 0x7fdf38b04400] Failed to read frame size: Could not seek to 1026.
[cache @ 0x7fdf38b04880] Statistics, cache hits:0 cache misses:0
cache:pipe:0: Invalid argument
File "/Users/user/Dropbox/python/source_code/kindle2text/audio_processing_utils.py", line 35, in text_to_speech
return AudioSegment.from_mp3(buffer)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/user/Dropbox/python/source_code/kindle2text/audio_processing_utils.py", line 44, in convert_long_text
for audio in executor.map(text_to_speech, chunks, [lang]*len(chunks)):
File "/Users/user/Dropbox/python/source_code/kindle2text/audio_processing_utils.py", line 78, in
convert_long_text(content,output_file)
pydub.exceptions.CouldntDecodeError: Decoding failed. ffmpeg returned error code: 1
Output from ffmpeg/avlib:
ffmpeg version 5.1.2 Copyright (c) 2000-2022 the FFmpeg developers
built with Apple clang version 12.0.0 (clang-1200.0.32.29)
configuration: --prefix=/usr/local/Cellar/ffmpeg/5.1.2_3 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags= --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libspeex --enable-libsoxr --enable-libzmq --enable-libzimg --disable-libjack --disable-indev=jack --enable-videotoolbox
libavutil 57. 28.100 / 57. 28.100
libavcodec 59. 37.100 / 59. 37.100
libavformat 59. 27.100 / 59. 27.100
libavdevice 59. 7.100 / 59. 7.100
libavfilter 8. 44.100 / 8. 44.100
libswscale 6. 7.100 / 6. 7.100
libswresample 4. 7.100 / 4. 7.100
libpostproc 56. 6.100 / 56. 6.100
[cache @ 0x7fdf38b04880] Inner protocol failed to seekback end : -78
Last message repeated 1 times
[mp3 @ 0x7fdf38b04400] Failed to read frame size: Could not seek to 1026.
[cache @ 0x7fdf38b04880] Statistics, cache hits:0 cache misses:0
cache:pipe:0: Invalid argument
このエラーメッセージは、pydubがffmpegを使ってMP3データのデコードを試みた際に失敗したことを示しています。
とあったので、良いやり方ではないがttsで作った音声をそのままmp3で保存し、再び読み込むコードにしました。
# テキストを音声に変換
def text_to_speech(segment, idx=0):
filename = f"{idx:03}.mp3"
tts = gTTS(text=segment, lang='ja')
tts.save(filename)
audio = AudioSegment.from_mp3(filename)
# 一時ファイルを削除
os.remove(filename)
return audio
これにより、正常に動作させることができました。
この並列処理のコードだと、単純に実行したときと比べ、およそ8倍程度処理速度が違いました。