From 16fff6a2e000c6dac81e59c10ab17e0719097e70 Mon Sep 17 00:00:00 2001 From: Mike Ganbold Date: Thu, 3 Dec 2020 15:24:38 -0800 Subject: [PATCH 1/3] samples: added test that covers the wrong file type case --- ...documents_sample_bad_input_v1beta3_test.py | 55 +++++++++++++++++++ .../batch_process_documents_sample_v1beta3.py | 4 +- 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py diff --git a/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py new file mode 100644 index 00000000..da0014d2 --- /dev/null +++ b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +from uuid import uuid4 + +from google.cloud import storage +from google.cloud.exceptions import NotFound + +import pytest + +from samples.snippets import batch_process_documents_sample_v1beta3 + +location = "us" +project_id = os.getenv("GOOGLE_CLOUD_PROJECT") +processor_id = "90484cfdedb024f6" +gcs_input_uri = "gs://cloud-samples-data/documentai/invoice.pdf" +# following bucket contains .csv file which will cause the sample to fail. +gcs_output_full_uri_with_wrong_type = "gs://documentai-beta-samples" +BUCKET_NAME = f"document-ai-python-{uuid4()}" + + +def test_batch_process_documents_with_bad_input(capsys): + try: + batch_process_documents_sample_v1beta3.batch_process_documents( + project_id=project_id, + location=location, + processor_id=processor_id, + gcs_input_uri=gcs_input_uri, + gcs_output_uri=gcs_output_full_uri_with_wrong_type, + gcs_output_uri_prefix='test', + timeout=450 + ) + out, _ = capsys.readouterr() + assert ( + "Failed to process" + in out + ) + except Exception as e: + assert ( + "Failed to process" + in e.message + ) diff --git a/samples/snippets/batch_process_documents_sample_v1beta3.py b/samples/snippets/batch_process_documents_sample_v1beta3.py index ea6c01e3..0b29b4ba 100644 --- a/samples/snippets/batch_process_documents_sample_v1beta3.py +++ b/samples/snippets/batch_process_documents_sample_v1beta3.py @@ -35,6 +35,7 @@ def batch_process_documents( gcs_input_uri, gcs_output_uri, gcs_output_uri_prefix, + timeout: int = 300, ): client = documentai.DocumentProcessorServiceClient() @@ -63,7 +64,7 @@ def batch_process_documents( operation = client.batch_process_documents(request) # Wait for the operation to finish - operation.result() + operation.result(timeout=timeout) # Results are written to GCS. Use a regex to find # output files @@ -79,6 +80,7 @@ def batch_process_documents( for i, blob in enumerate(blob_list): # Download the contents of this blob as a bytes object. if ".json" not in blob.name: + print(f"skipping non-supported file type {blob.name}") return # Only parses JSON files blob_as_bytes = blob.download_as_bytes() From f4e5b60cfdb3760111e2d30019aad276493e4dbf Mon Sep 17 00:00:00 2001 From: Mike Ganbold Date: Thu, 3 Dec 2020 15:31:28 -0800 Subject: [PATCH 2/3] fixed the lint --- ...cess_documents_sample_bad_input_v1beta3_test.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py index da0014d2..fc8ee82e 100644 --- a/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py +++ b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py @@ -40,16 +40,10 @@ def test_batch_process_documents_with_bad_input(capsys): processor_id=processor_id, gcs_input_uri=gcs_input_uri, gcs_output_uri=gcs_output_full_uri_with_wrong_type, - gcs_output_uri_prefix='test', - timeout=450 + gcs_output_uri_prefix="test", + timeout=450, ) out, _ = capsys.readouterr() - assert ( - "Failed to process" - in out - ) + assert "Failed to process" in out except Exception as e: - assert ( - "Failed to process" - in e.message - ) + assert "Failed to process" in e.message From 76ba8828362b74954919adcd9e250f08de5aba27 Mon Sep 17 00:00:00 2001 From: Mike Ganbold Date: Thu, 3 Dec 2020 15:39:28 -0800 Subject: [PATCH 3/3] lint --- .../batch_process_documents_sample_bad_input_v1beta3_test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py index fc8ee82e..e0a7e468 100644 --- a/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py +++ b/samples/snippets/batch_process_documents_sample_bad_input_v1beta3_test.py @@ -16,11 +16,6 @@ import os from uuid import uuid4 -from google.cloud import storage -from google.cloud.exceptions import NotFound - -import pytest - from samples.snippets import batch_process_documents_sample_v1beta3 location = "us"