diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst index 5ba566432fdd..3846125a79f9 100644 --- a/docs/speech-usage.rst +++ b/docs/speech-usage.rst @@ -88,14 +88,14 @@ Great Britian. >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.FLAC, ... sample_rate=44100) - >>> results = sample.sync_recognize( + >>> alternatives = sample.sync_recognize( ... speech.Encoding.FLAC, 16000, ... source_uri='gs://my-bucket/recording.flac', language_code='en-GB', ... max_alternatives=2) - >>> for result in results: + >>> for alternative in alternatives: ... print('=' * 20) - ... print('transcript: ' + result.transcript) - ... print('confidence: ' + result.confidence) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + alternative.confidence) ==================== transcript: Hello, this is a test confidence: 0.81 @@ -112,12 +112,12 @@ Example of using the profanity filter. >>> sample = client.sample(source_uri='gs://my-bucket/recording.flac', ... encoding=speech.Encoding.FLAC, ... sample_rate=44100) - >>> results = sample.sync_recognize(max_alternatives=1, - ... profanity_filter=True) - >>> for result in results: + >>> alternatives = sample.sync_recognize(max_alternatives=1, + ... profanity_filter=True) + >>> for alternative in alternatives: ... print('=' * 20) - ... print('transcript: ' + result.transcript) - ... print('confidence: ' + result.confidence) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + alternative.confidence) ==================== transcript: Hello, this is a f****** test confidence: 0.81 @@ -134,12 +134,12 @@ words to the vocabulary of the recognizer. ... encoding=speech.Encoding.FLAC, ... sample_rate=44100) >>> hints = ['hi', 'good afternoon'] - >>> results = sample.sync_recognize(max_alternatives=2, - ... speech_context=hints) - >>> for result in results: + >>> alternatives = sample.sync_recognize(max_alternatives=2, + ... speech_context=hints) + >>> for alternative in alternatives: ... print('=' * 20) - ... print('transcript: ' + result.transcript) - ... print('confidence: ' + result.confidence) + ... print('transcript: ' + alternative.transcript) + ... print('confidence: ' + alternative.confidence) ==================== transcript: Hello, this is a test confidence: 0.81 diff --git a/speech/google/cloud/speech/_gax.py b/speech/google/cloud/speech/_gax.py index 2465ec0e50bd..3c343f2227b5 100644 --- a/speech/google/cloud/speech/_gax.py +++ b/speech/google/cloud/speech/_gax.py @@ -16,14 +16,12 @@ from google.cloud.gapic.speech.v1beta1.speech_client import SpeechClient -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( - RecognitionConfig) -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( - SpeechContext) -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionAudio +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import RecognitionConfig +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import SpeechContext +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( StreamingRecognitionConfig) -from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( +from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( StreamingRecognizeRequest) from google.longrunning import operations_grpc @@ -31,8 +29,8 @@ from google.cloud._helpers import make_secure_stub from google.cloud._http import DEFAULT_USER_AGENT +from google.cloud.speech.alternative import Alternative from google.cloud.speech.operation import Operation -from google.cloud.speech.result import Result OPERATIONS_API_HOST = 'speech.googleapis.com' @@ -237,9 +235,15 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, words to the vocabulary of the recognizer. :rtype: list - :returns: List of :class:`google.cloud.speech.result.Result` objects. + :returns: A list of dictionaries. One dict for each alternative. Each + dictionary typically contains two keys (though not + all will be present in all cases) - :raises: ValueError if there are no results. + * ``transcript``: The detected text from the audio recording. + * ``confidence``: The confidence in language detection, float + between 0 and 1. + + :raises: ValueError if more than one result is returned or no results. """ config = RecognitionConfig( encoding=sample.encoding, sample_rate=sample.sample_rate, @@ -250,13 +254,13 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None, uri=sample.source_uri) api = self._gapic_api api_response = api.sync_recognize(config=config, audio=audio) - - # Sanity check: If we got no results back, raise an error. - if len(api_response.results) == 0: - raise ValueError('No results returned from the Speech API.') - - # Iterate over any results that came back. - return [Result.from_pb(result) for result in api_response.results] + if len(api_response.results) == 1: + results = api_response.results.pop() + alternatives = results.alternatives + return [Alternative.from_pb(alternative) + for alternative in alternatives] + else: + raise ValueError('More than one result or none returned from API.') def _stream_requests(sample, language_code=None, max_alternatives=None, diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py index 0bf96c68e100..f6bcdf50b26a 100644 --- a/speech/google/cloud/speech/client.py +++ b/speech/google/cloud/speech/client.py @@ -60,11 +60,7 @@ class Client(BaseClient): def __init__(self, credentials=None, http=None, use_gax=None): super(Client, self).__init__(credentials=credentials, http=http) self._connection = Connection( - credentials=self._credentials, - http=self._http, - ) - - # Save on the actual client class whether we use GAX or not. + credentials=self._credentials, http=self._http) if use_gax is None: self._use_gax = _USE_GAX else: diff --git a/speech/google/cloud/speech/operation.py b/speech/google/cloud/speech/operation.py index 330328ce0b3b..f56f0602fa1c 100644 --- a/speech/google/cloud/speech/operation.py +++ b/speech/google/cloud/speech/operation.py @@ -14,10 +14,10 @@ """Long running operation representation for Google Speech API""" -from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 +from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 from google.cloud import operation -from google.cloud.speech.result import Result +from google.cloud.speech.alternative import Alternative operation.register_type(cloud_speech_pb2.AsyncRecognizeMetadata) @@ -58,13 +58,11 @@ def _update_state(self, operation_pb): if result_type != 'response': return - # Retrieve the results. - # If there were no results at all, raise an exception. pb_results = self.response.results - if len(pb_results) == 0: - raise ValueError('Speech API returned no results.') + if len(pb_results) != 1: + raise ValueError('Expected exactly one result, found:', + pb_results) - # Save the results to the Operation object. - self.results = [] - for pb_result in pb_results: - self.results.append(Result.from_pb(pb_result)) + result = pb_results[0] + self.results = [Alternative.from_pb(alternative) + for alternative in result.alternatives] diff --git a/speech/google/cloud/speech/result.py b/speech/google/cloud/speech/result.py index bba01a047c5d..11efb93626b6 100644 --- a/speech/google/cloud/speech/result.py +++ b/speech/google/cloud/speech/result.py @@ -17,53 +17,6 @@ from google.cloud.speech.alternative import Alternative -class Result(object): - """Speech recognition result representation. - - This is the object that comes back on sync or async requests - (but not streaming requests). - - :type alternatives: list - :param alternatives: List of - :class:`~google.cloud.speech.alternative.Alternative`. - """ - def __init__(self, alternatives): - self.alternatives = alternatives - - @classmethod - def from_pb(cls, result): - """Factory: construct instance of ``SpeechRecognitionResult``. - - :type result: :class:`~google.cloud.grpc.speech.v1beta1\ - .cloud_speech_pb2.StreamingRecognizeResult` - :param result: Instance of ``StreamingRecognizeResult`` protobuf. - - :rtype: :class:`~google.cloud.speech.result.SpeechRecognitionResult` - :returns: Instance of ``SpeechRecognitionResult``. - """ - alternatives = [Alternative.from_pb(result) for result - in result.alternatives] - return cls(alternatives=alternatives) - - @property - def confidence(self): - """Return the confidence for the most probable alternative. - - :rtype: float - :returns: Confidence value, between 0 and 1. - """ - return self.alternatives[0].confidence - - @property - def transcript(self): - """Return the transcript for the most probable alternative. - - :rtype: str - :returns: Speech transcript. - """ - return self.alternatives[0].transcript - - class StreamingSpeechResult(object): """Streaming speech result representation. @@ -93,27 +46,9 @@ def from_pb(cls, response): :rtype: :class:`~google.cloud.speech.result.StreamingSpeechResult` :returns: Instance of ``StreamingSpeechResult``. """ - alternatives = [Alternative.from_pb(result) for result - in response.alternatives] + alternatives = [Alternative.from_pb(alternative) + for alternative in response.alternatives] is_final = response.is_final stability = response.stability return cls(alternatives=alternatives, is_final=is_final, stability=stability) - - @property - def confidence(self): - """Return the confidence for the most probable alternative. - - :rtype: float - :returns: Confidence value, between 0 and 1. - """ - return self.alternatives[0].confidence - - @property - def transcript(self): - """Return the transcript for the most probable alternative. - - :rtype: str - :returns: Speech transcript. - """ - return self.alternatives[0].transcript diff --git a/speech/setup.py b/speech/setup.py index aa8796d18b57..eb10b0e7dff2 100644 --- a/speech/setup.py +++ b/speech/setup.py @@ -51,7 +51,7 @@ REQUIREMENTS = [ 'google-cloud-core >= 0.22.1, < 0.23dev', 'grpcio >= 1.0.2, < 2.0dev', - 'gapic-google-cloud-speech-v1beta1 >= 0.15.0, < 0.16dev', + 'gapic-google-cloud-speech-v1beta1 >= 0.14.0, < 0.15dev', ] setup( diff --git a/speech/unit_tests/test__gax.py b/speech/unit_tests/test__gax.py index b3e40edc1797..7ee1b0945fa5 100644 --- a/speech/unit_tests/test__gax.py +++ b/speech/unit_tests/test__gax.py @@ -36,8 +36,13 @@ def _call_fut(self, sample, language_code, max_alternatives, def test_ctor(self): from google.cloud import speech from google.cloud.speech.sample import Sample - from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( - RecognitionConfig, SpeechContext, StreamingRecognitionConfig, + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + SpeechContext) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + RecognitionConfig) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + StreamingRecognitionConfig) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( StreamingRecognizeRequest) sample = Sample(content=self.AUDIO_CONTENT, @@ -98,8 +103,10 @@ def test_stream_requests(self): from io import BytesIO from google.cloud import speech from google.cloud.speech.sample import Sample - from google.cloud.proto.speech.v1beta1.cloud_speech_pb2 import ( - StreamingRecognitionConfig, StreamingRecognizeRequest) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + StreamingRecognitionConfig) + from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import ( + StreamingRecognizeRequest) sample = Sample(stream=BytesIO(self.AUDIO_CONTENT), encoding=speech.Encoding.FLAC, diff --git a/speech/unit_tests/test_alternative.py b/speech/unit_tests/test_alternative.py index 7a79d37e43b8..950392ae517d 100644 --- a/speech/unit_tests/test_alternative.py +++ b/speech/unit_tests/test_alternative.py @@ -54,7 +54,7 @@ def test_from_api_repr_with_no_confidence(self): self.assertIsNone(alternative.confidence) def test_from_pb_with_no_confidence(self): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 text = 'the double trouble' pb_value = cloud_speech_pb2.SpeechRecognitionAlternative( diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py index 43d3527ec339..a5c37d9c3d0a 100644 --- a/speech/unit_tests/test_client.py +++ b/speech/unit_tests/test_client.py @@ -24,7 +24,7 @@ def _make_credentials(): def _make_result(alternatives=()): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 return cloud_speech_pb2.SpeechRecognitionResult( alternatives=[ @@ -37,7 +37,7 @@ def _make_result(alternatives=()): def _make_streaming_result(alternatives=(), is_final=True, stability=1.0): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 return cloud_speech_pb2.StreamingRecognitionResult( alternatives=[ @@ -52,7 +52,7 @@ def _make_streaming_result(alternatives=(), is_final=True, stability=1.0): def _make_streaming_response(*results): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 response = cloud_speech_pb2.StreamingRecognizeResponse( results=results, @@ -61,7 +61,7 @@ def _make_streaming_response(*results): def _make_sync_response(*results): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 response = cloud_speech_pb2.SyncRecognizeResponse( results=results, @@ -202,7 +202,7 @@ def test_sync_recognize_source_uri_without_optional_params_no_gax(self): sample = client.sample(source_uri=self.AUDIO_SOURCE_URI, encoding=encoding, sample_rate=self.SAMPLE_RATE) - response = [i for i in sample.sync_recognize()] + response = sample.sync_recognize() self.assertEqual(len(client._connection._requested), 1) req = client._connection._requested[0] @@ -231,7 +231,7 @@ def test_sync_recognize_with_empty_results_no_gax(self): sample_rate=self.SAMPLE_RATE) with self.assertRaises(ValueError): - next(sample.sync_recognize()) + sample.sync_recognize() def test_sync_recognize_with_empty_results_gax(self): from google.cloud._testing import _Monkey @@ -274,7 +274,7 @@ def speech_api(channel=None): sample_rate=self.SAMPLE_RATE) with self.assertRaises(ValueError): - next(sample.sync_recognize()) + sample.sync_recognize() def test_sync_recognize_with_gax(self): from google.cloud._testing import _Monkey @@ -326,19 +326,16 @@ def speech_api(channel=None): self.assertEqual( channel_args, [(creds, _gax.DEFAULT_USER_AGENT, host)]) - results = [i for i in sample.sync_recognize()] + results = sample.sync_recognize() - self.assertEqual(len(results), 1) - self.assertEqual(len(results[0].alternatives), 2) + self.assertEqual(len(results), 2) self.assertEqual(results[0].transcript, - results[0].alternatives[0].transcript, alternatives[0]['transcript']) self.assertEqual(results[0].confidence, - results[0].alternatives[0].confidence, alternatives[0]['confidence']) - self.assertEqual(results[0].alternatives[1].transcript, + self.assertEqual(results[1].transcript, alternatives[1]['transcript']) - self.assertEqual(results[0].alternatives[1].confidence, + self.assertEqual(results[1].confidence, alternatives[1]['confidence']) def test_async_supported_encodings(self): @@ -538,11 +535,9 @@ def speech_api(channel=None): self.assertEqual(results[0].stability, 0.122435) self.assertEqual(results[1].stability, 0.1432343) self.assertFalse(results[1].is_final) - self.assertEqual(results[1].transcript, - results[1].alternatives[0].transcript, + self.assertEqual(results[1].alternatives[0].transcript, alternatives[0]['transcript']) - self.assertEqual(results[1].confidence, - results[1].alternatives[0].confidence, + self.assertEqual(results[1].alternatives[0].confidence, alternatives[0]['confidence']) self.assertEqual(results[1].alternatives[1].transcript, alternatives[1]['transcript']) @@ -550,11 +545,9 @@ def speech_api(channel=None): alternatives[1]['confidence']) self.assertTrue(results[2].is_final) self.assertEqual(results[2].stability, 0.9834534) - self.assertEqual(results[2].transcript, - results[2].alternatives[0].transcript, + self.assertEqual(results[2].alternatives[0].transcript, alternatives[0]['transcript']) - self.assertEqual(results[2].confidence, - results[2].alternatives[0].confidence, + self.assertEqual(results[2].alternatives[0].confidence, alternatives[0]['confidence']) def test_stream_recognize(self): diff --git a/speech/unit_tests/test_operation.py b/speech/unit_tests/test_operation.py index cbcac1a363a7..8a890d134720 100644 --- a/speech/unit_tests/test_operation.py +++ b/speech/unit_tests/test_operation.py @@ -44,7 +44,7 @@ def test_constructor(self): @staticmethod def _make_result(transcript, confidence): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 return cloud_speech_pb2.SpeechRecognitionResult( alternatives=[ @@ -56,7 +56,7 @@ def _make_result(transcript, confidence): ) def _make_operation_pb(self, *results): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 + from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 from google.longrunning import operations_pb2 from google.protobuf.any_pb2 import Any @@ -86,48 +86,31 @@ def test__update_state_no_response(self): def test__update_state_with_response(self): from google.cloud.speech.alternative import Alternative - from google.cloud.speech.result import Result client = object() operation = self._make_one( self.OPERATION_NAME, client) - results = [ - self._make_result('hi mom', 0.75), - self._make_result('hi dad', 0.75), - ] - operation_pb = self._make_operation_pb(*results) + text = 'hi mom' + confidence = 0.75 + result = self._make_result(text, confidence) + operation_pb = self._make_operation_pb(result) operation._update_state(operation_pb) self.assertIsNotNone(operation.response) - self.assertEqual(len(operation.results), 2) - for result, text in zip(operation.results, ['hi mom', 'hi dad']): - self.assertIsInstance(result, Result) - self.assertEqual(result.transcript, text) - self.assertEqual(result.confidence, 0.75) - self.assertIsInstance(result.alternatives, list) - self.assertIsInstance(result.alternatives[0], Alternative) + self.assertEqual(len(operation.results), 1) + alternative = operation.results[0] + self.assertIsInstance(alternative, Alternative) + self.assertEqual(alternative.transcript, text) + self.assertEqual(alternative.confidence, confidence) - def test__update_state_with_empty_response(self): - from google.cloud.proto.speech.v1beta1 import cloud_speech_pb2 - from google.longrunning import operations_pb2 - from google.protobuf.any_pb2 import Any - - # Simulate an empty response (rather than no response yet, which - # is distinct). - response = cloud_speech_pb2.AsyncRecognizeResponse(results=[]) - type_url = 'type.googleapis.com/%s' % response.DESCRIPTOR.full_name - any_pb = Any( - type_url=type_url, - value=response.SerializeToString(), - ) - operation_pb = operations_pb2.Operation( - name=self.OPERATION_NAME, - response=any_pb, - ) - - # Establish that we raise ValueError at state update time. + def test__update_state_bad_response(self): client = object() - operation = self._make_one(self.OPERATION_NAME, client) + operation = self._make_one( + self.OPERATION_NAME, client) + + result1 = self._make_result('is this ok?', 0.625) + result2 = self._make_result('ease is ok', None) + operation_pb = self._make_operation_pb(result1, result2) with self.assertRaises(ValueError): operation._update_state(operation_pb)