diff --git a/samples/snippets/beta_snippets_test.py b/samples/snippets/beta_snippets_test.py index 4dcc486c..5ef51497 100644 --- a/samples/snippets/beta_snippets_test.py +++ b/samples/snippets/beta_snippets_test.py @@ -15,7 +15,6 @@ from beta_snippets import ( transcribe_file_with_auto_punctuation, - transcribe_file_with_diarization, transcribe_file_with_enhanced_model, transcribe_file_with_metadata, transcribe_file_with_multichannel, @@ -48,14 +47,6 @@ def test_transcribe_file_with_auto_punctuation(capsys): assert "First alternative of result " in out -def test_transcribe_diarization(capsys): - transcribe_file_with_diarization() - out, err = capsys.readouterr() - - assert "word:" in out - assert "speaker_tag:" in out - - def test_transcribe_multichannel_file(capsys): transcribe_file_with_multichannel() out, err = capsys.readouterr() diff --git a/samples/snippets/transcribe_diarization_beta.py b/samples/snippets/transcribe_diarization_beta.py new file mode 100644 index 00000000..2933b292 --- /dev/null +++ b/samples/snippets/transcribe_diarization_beta.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def transcribe_file_with_diarization(speech_file): + """Transcribe the given audio file synchronously with diarization.""" + # [START speech_transcribe_diarization_beta] + from google.cloud import speech_v1p1beta1 as speech + + client = speech.SpeechClient() + + with open(speech_file, "rb") as audio_file: + content = audio_file.read() + + audio = speech.RecognitionAudio(content=content) + + config = speech.RecognitionConfig( + encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, + sample_rate_hertz=8000, + language_code="en-US", + #enable_speaker_diarization=True, + diarization_config={ + "min_speaker_count": 2 + }, + ) + + print("Waiting for operation to complete...") + response = client.recognize(config=config, audio=audio) + + print(response) + + # The transcript within each result is separate and sequential per result. + # However, the words list within an alternative includes all the words + # from all the results thus far. Thus, to get all the words with speaker + # tags, you only have to take the words list from the last result: + result = response.results[-1] + + words_info = result.alternatives[0].words + + # Printing out the output: + for word_info in words_info: + print( + u"word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag) + ) + # [END speech_transcribe_diarization_beta] \ No newline at end of file diff --git a/samples/snippets/transcribe_diarization_beta_test.py b/samples/snippets/transcribe_diarization_beta_test.py new file mode 100644 index 00000000..edbbc6aa --- /dev/null +++ b/samples/snippets/transcribe_diarization_beta_test.py @@ -0,0 +1,28 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from transcribe_diarization_beta import transcribe_file_with_diarization + +def test_transcribe_diarization(capsys): + + test_file_path = os.path.join(os.path.dirname(__file__), + "resources/commercial_mono.wav") + + transcribe_file_with_diarization(test_file_path) + out, err = capsys.readouterr() + + assert "word:" in out + assert "speaker_tag:" in out \ No newline at end of file