From da6bbade1f4c4cba8ffd18ab12c3c6947d5f3e4f Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 03:11:18 +1000 Subject: [PATCH 1/9] Allow tests to be run under python3.8. --- noxfile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1e43b20e..1bef5b65 100644 --- a/noxfile.py +++ b/noxfile.py @@ -30,7 +30,7 @@ BLACK_PATHS.append("samples") -@nox.session(python="3.7") +@nox.session(python=["3.7", "3.8"]) def lint(session): """Run linters. @@ -56,7 +56,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.7") +@nox.session(python=["3.7", "3.8"]) def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" session.install("docutils", "pygments") @@ -90,7 +90,7 @@ def unit(session): default(session) -@nox.session(python=["2.7", "3.7"]) +@nox.session(python=["2.7", "3.7", "3.8"]) def system(session): """Run the system test suite.""" system_test_path = os.path.join("tests", "system.py") @@ -120,7 +120,7 @@ def system(session): session.run("py.test", "--quiet", system_test_folder_path, *session.posargs) -@nox.session(python="3.7") +@nox.session(python=["3.7", "3.8"]) def cover(session): """Run the final coverage report. From 80409cdbe6c52176507f79df6c3def57f8a9213b Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 01:18:30 +1000 Subject: [PATCH 2/9] Use AnonymousCredentials directly. The google.cloud.auth package provides a Credentials class for use with emulators (and other systems that do not require oauth credentials). Use this rather than rolling out own. It seems a couple of people around the internet have copied this method for running code against the emulators. We should ensure that if people are going to copy these tests, that at least they do something less hacky. --- tests/system/test_system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 577bd748..8fcbceb5 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -19,13 +19,13 @@ import requests import six +import google.auth.credentials from google.cloud._helpers import UTC from google.cloud import datastore from google.cloud.datastore.helpers import GeoPoint from google.cloud.environment_vars import GCD_DATASET from google.cloud.exceptions import Conflict -from test_utils.system import EmulatorCreds from test_utils.system import unique_resource_id from tests.system.utils import clear_datastore @@ -59,7 +59,7 @@ def setUpModule(): if emulator_dataset is None: Config.CLIENT = datastore.Client(namespace=test_namespace) else: - credentials = EmulatorCreds() + credentials = google.auth.credentials.AnonymousCredentials() http = requests.Session() # Un-authorized. Config.CLIENT = datastore.Client( project=emulator_dataset, From aa25eaf19e092d088bb8e73ab6f83f1224a06643 Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 01:21:00 +1000 Subject: [PATCH 3/9] Don't require auth for emulator tests. We remove the requirement for the GOOGLE_APPLICATION_CREDENTIALS if we detect that we should be running against the local datastore emulator. --- noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1bef5b65..d48ded2e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -96,8 +96,10 @@ def system(session): system_test_path = os.path.join("tests", "system.py") system_test_folder_path = os.path.join("tests", "system") # Sanity check: Only run tests if the environment variable is set. - if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable") + is_emulator = 'DATASTORE_DATASET' in os.environ + has_credentials = bool(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", "")) + if not is_emulator and not has_credentials: + session.skip("Credentials must be set via environment variable for non-emulator tests") system_test_exists = os.path.exists(system_test_path) system_test_folder_exists = os.path.exists(system_test_folder_path) From f68fb131bd887da5967314dab716786527e670fb Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 01:14:26 +1000 Subject: [PATCH 4/9] Make empty_array_put test run against the emulator. This unit test was overwriting the datastore client being used to be a default constructed client. This was causing it to be constructed with the default credentials object, which requires that the test is either run in GCP or with the `GOOGLE_APPLICATION_CREDENTIALS` environment variable set. --- tests/system/test_system.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 8fcbceb5..7f998d2e 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -601,7 +601,6 @@ def test_empty_array_put(self): local_client = clone_client(Config.CLIENT) key = local_client.key("EmptyArray", 1234) - local_client = datastore.Client() entity = datastore.Entity(key=key) entity["children"] = [] local_client.put(entity) From 788e6f70625a9f29e7d9434f46e5193c2f637173 Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 01:56:22 +1000 Subject: [PATCH 5/9] Improve test failure messages. Move away from assertTrue to a helper that outputs the actual value and the expected value when the test fails. --- tests/system/test_system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 7f998d2e..4da7ef4e 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -374,7 +374,7 @@ def test_query_paginate_simple_uuid_keys(self): self.assertNotIn(uuid_str, seen, uuid_str) seen.add(uuid_str) - self.assertTrue(page_count > 1) + self.assertGreater(page_count, 1) def test_query_paginate_simple_timestamp_keys(self): @@ -391,7 +391,7 @@ def test_query_paginate_simple_timestamp_keys(self): self.assertNotIn(timestamp, seen, timestamp) seen.add(timestamp) - self.assertTrue(page_count > 1) + self.assertGreater(page_count, 1) def test_query_offset_timestamp_keys(self): # See issue #4675 From 06bba63898f0438fbeee254cdd45e133d2670ad7 Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 01:51:35 +1000 Subject: [PATCH 6/9] remove_all_entities tries to remove too many entities at once. The last test run in TestDatastoreQuery would fail as it was trying to remove more than 500 entities at once. We break the deletes up into batches to work around this limitation of the datastore api. --- tests/system/utils/clear_datastore.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/system/utils/clear_datastore.py b/tests/system/utils/clear_datastore.py index 3438ff89..4733268d 100644 --- a/tests/system/utils/clear_datastore.py +++ b/tests/system/utils/clear_datastore.py @@ -84,7 +84,11 @@ def remove_all_entities(client): query = client.query() results = list(query.fetch()) keys = [entity.key for entity in results] - client.delete_multi(keys) + BATCH_SIZE = 500 # Datastore API only allows 500 mutations in a single call. + while keys: + batch = keys[:BATCH_SIZE] + keys = keys[BATCH_SIZE:] + client.delete_multi(batch) def main(): From a08adeca8e63cd55231a4906c7ccd0d6dbdb2bfd Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 01:54:02 +1000 Subject: [PATCH 7/9] Fix timestamp/uuid tests in the emulator. These tests assume that specific pieces of data exist within datastore, but nothing in the test created these pieces of data. Also, the TestDatastoreQuery.test_query_offset_timestamp_keys test assumes there is >= 10k entities in the datastore, but the create function would only add 1k entities. --- tests/system/test_system.py | 2 ++ tests/system/utils/populate_datastore.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index 4da7ef4e..e258cdc8 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -243,6 +243,8 @@ def setUpClass(cls): if os.getenv(GCD_DATASET) is not None: # Populate the datastore with the cloned client. populate_datastore.add_characters(client=cls.CLIENT) + populate_datastore.add_uid_keys(client=cls.CLIENT) + populate_datastore.add_timestamp_keys(client=cls.CLIENT) cls.CHARACTERS = populate_datastore.CHARACTERS # Use the client for this test instead of the global. diff --git a/tests/system/utils/populate_datastore.py b/tests/system/utils/populate_datastore.py index e8e1574a..108ff229 100644 --- a/tests/system/utils/populate_datastore.py +++ b/tests/system/utils/populate_datastore.py @@ -156,8 +156,9 @@ def add_timestamp_keys(client=None): # Get a client that uses the test dataset. client = datastore.Client() - num_batches = 2 + num_batches = 21 batch_size = 500 + assert num_batches * batch_size > 10000, 'test_query_offset_timestamp_keys requires at least 10k entries, otherwise it fails' timestamp_micros = set() for batch_num in range(num_batches): From 920e646701b48d2c0bec2812730de0456bd152c2 Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 03:04:42 +1000 Subject: [PATCH 8/9] Fix tests that use _verify in the emulator. The datastore emulator does not check the size of the response messages before trying to send them back to the client. For all of the tests that return large objects, this results in the response exceeding the ~4MiB grpc message limit (enforces in message_size_filter). The response size for these tests is ~10MiB. Since we do not need the actual contents of the entity, we can request just the keys, via a keys_only query. Technically this is changing they type of query, which may be considered testing a different thing. I will leave this up to the maintainers if they want to merge this change. --- tests/system/test_system.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/system/test_system.py b/tests/system/test_system.py index e258cdc8..a1a49304 100644 --- a/tests/system/test_system.py +++ b/tests/system/test_system.py @@ -497,6 +497,7 @@ def _base_query(self): def _verify(self, limit, offset, expected): # Query used for all tests page_query = self._base_query() + page_query.keys_only() # Work around bug in the datastore emulator where it fails to take into account the size of the response (and fails due to a grpc response too large error). page_query.add_filter("family", "=", "Stark") page_query.add_filter("alive", "=", False) From efa16a86e7f20175ff46dfe8aee64e54a6cda86a Mon Sep 17 00:00:00 2001 From: Greg Darke Date: Tue, 14 Apr 2020 03:09:09 +1000 Subject: [PATCH 9/9] Make the populate/clear methods stay under the size limit. There is both a 500 entities per mutation limit, and a ~4MiB request/response size limit. Update both the populate and clear libraries to stay below these limits. --- tests/system/utils/clear_datastore.py | 18 +++- tests/system/utils/populate_datastore.py | 101 +++++++++++++++-------- 2 files changed, 79 insertions(+), 40 deletions(-) diff --git a/tests/system/utils/clear_datastore.py b/tests/system/utils/clear_datastore.py index 4733268d..2ae94d92 100644 --- a/tests/system/utils/clear_datastore.py +++ b/tests/system/utils/clear_datastore.py @@ -81,14 +81,24 @@ def remove_kind(kind, client): def remove_all_entities(client): + BATCH_SIZE = 500 # Datastore API only allows 500 mutations in a single call. + KEY_BYTES_LIMIT = 3 << 20 # grpc limit is ~4MiB, so use a 3MiB limit (to work around any encoding issues) + query = client.query() + query.keys_only() results = list(query.fetch()) keys = [entity.key for entity in results] - BATCH_SIZE = 500 # Datastore API only allows 500 mutations in a single call. while keys: - batch = keys[:BATCH_SIZE] - keys = keys[BATCH_SIZE:] - client.delete_multi(batch) + key_bytes = 0 + batch = [] + # Grab keys to delete, while ensuring we stay within our bounds. + while len(batch) < BATCH_SIZE and key_bytes < KEY_BYTES_LIMIT and keys: + batch.append(keys.pop()) + if batch[-1].name is None: + key_bytes += 9 # It takes 9 bytes for the largest varint encoded number + else: + key_bytes += len(batch[-1].name) + client.delete_multi(batch) def main(): diff --git a/tests/system/utils/populate_datastore.py b/tests/system/utils/populate_datastore.py index 108ff229..d1921e43 100644 --- a/tests/system/utils/populate_datastore.py +++ b/tests/system/utils/populate_datastore.py @@ -63,55 +63,84 @@ def print_func(message): print(message) +def _estimate_entity_size(entity): + def _estimate_value_size(value): + if isinstance(value, six.integer_types): + return 9 # Max varint size + elif isinstance(value, float): + return 8 + elif isinstance(value, six.string_types) or isinstance(value, six.binary_type): + return len(value) + elif isinstance(value, (list, tuple)): + return sum(_estimate_entity_size(elem) for elem in value) + elif isinstance(value, dict): + return _estimate_entity_size(value) + result = 0 + for key, value in entity.items(): + result += len(key) # The number of runes is fine, no point forcing a utf-8 encoding here. + result += _estimate_value_size(value) + return result + def add_large_character_entities(client=None): TOTAL_OBJECTS = 2500 NAMESPACE = "LargeCharacterEntity" KIND = "LargeCharacter" MAX_STRING = (string.ascii_lowercase * 58)[:1500] + BATCH_SIZE = 500 # Datastore API only allows 500 mutations in a single call. + RPC_BYTES_LIMIT = 3 << 20 # grpc limit is ~4MiB, so use a 3MiB limit (to work around any encoding issues) + client.namespace = NAMESPACE # Query used for all tests page_query = client.query(kind=KIND, namespace=NAMESPACE) + page_query.keys_only() def put_objects(count): - current = 0 - - # Can only do 500 operations in a transaction with an overall - # size limit. - ENTITIES_TO_BATCH = 25 - while current < count: - start = current - end = min(current + ENTITIES_TO_BATCH, count) - with client.transaction() as xact: - # The name/ID for the new entity - for i in range(start, end): - name = "character{0:05d}".format(i) - # The Cloud Datastore key for the new entity - task_key = client.key(KIND, name) - - # Prepares the new entity - task = datastore.Entity(key=task_key) - task["name"] = "{0:05d}".format(i) - task["family"] = "Stark" - task["alive"] = False - - for i in string.ascii_lowercase: - task["space-{}".format(i)] = MAX_STRING - - # Saves the entity - xact.put(task) - current += ENTITIES_TO_BATCH - - # Ensure we have 1500 entities for tests. If not, clean up type and add + remaining = count + entities = [] + # The name/ID for the new entity + for i in range(count): + name = "character{0:05d}".format(i) + # The Cloud Datastore key for the new entity + task_key = client.key(KIND, name) + + # Prepares the new entity + task = datastore.Entity(key=task_key) + task["name"] = "{0:05d}".format(i) + task["family"] = "Stark" + task["alive"] = False + for i in string.ascii_lowercase: + task["space-{}".format(i)] = MAX_STRING + entities.append(task) + + # Now lets try to insert all of the entities, in batches. + while entities: + approx_rpc_bytes = 0 + batch = [] + while entities and len(batch) < BATCH_SIZE and approx_rpc_bytes < RPC_BYTES_LIMIT: + batch.append(entities.pop()) + approx_rpc_bytes += _estimate_entity_size(batch[-1]) + # These entities are all in different entity groups, so there is no + # benefit in placing them in a transaction. + client.put_multi(batch) + + # Ensure we have 2500 entities for tests. If not, clean up type and add # new entities equal to TOTAL_OBJECTS - all_entities = [e for e in page_query.fetch()] - if len(all_entities) != TOTAL_OBJECTS: - # Cleanup Collection if not an exact match - while all_entities: - entities = all_entities[:500] - all_entities = all_entities[500:] - client.delete_multi([e.key for e in entities]) + all_keys = [e.key for e in page_query.fetch()] + if len(all_keys) != TOTAL_OBJECTS: + # Remove all of the entites that exist of this kind in this namespace. + while all_keys: + key_bytes = 0 + batch = [] + # Grab keys to delete, while ensuring we stay within our bounds. + while len(batch) < BATCH_SIZE and key_bytes < RPC_BYTES_LIMIT and all_keys: + batch.append(all_keys.pop()) + if batch[-1].name is None: + key_bytes += 9 # It takes 9 bytes for the largest varint encoded number + else: + key_bytes += len(batch[-1].name) + client.delete_multi(batch) # Put objects put_objects(TOTAL_OBJECTS)