From 79337507065c37e226a782f2cdd3db533103fef8 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 1 Apr 2026 18:21:53 +0200 Subject: [PATCH 1/4] Disallow crawling 3.9 docs --- templates/robots.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/templates/robots.txt b/templates/robots.txt index a7de4a6..291c068 100644 --- a/templates/robots.txt +++ b/templates/robots.txt @@ -24,3 +24,4 @@ Disallow: /3.5/ Disallow: /3.6/ Disallow: /3.7/ Disallow: /3.8/ +Disallow: /3.9/ From 3e54acdbaf2aa10f5ab7db3cc803c17f29164042 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 1 Apr 2026 20:08:46 +0200 Subject: [PATCH 2/4] Automatically populate robots.txt --- build_docs.py | 16 ++++++++++------ templates/robots.txt | 24 +++++------------------- tests/test_build_docs.py | 23 ++++++++++++++++++++++- 3 files changed, 37 insertions(+), 26 deletions(-) diff --git a/build_docs.py b/build_docs.py index e44ffa0..1110335 100755 --- a/build_docs.py +++ b/build_docs.py @@ -1251,7 +1251,8 @@ def build_docs(args: argparse.Namespace) -> int: build_sitemap(versions, languages, args.www_root, args.group) build_404(args.www_root, args.group) - copy_robots_txt( + build_robots_txt( + versions, args.www_root, args.group, args.skip_cache_invalidation, @@ -1338,20 +1339,23 @@ def build_404(www_root: Path, group: str) -> None: chgrp(not_found_file, group=group) -def copy_robots_txt( +def build_robots_txt( + versions: Versions, www_root: Path, group: str, skip_cache_invalidation: bool, http: urllib3.PoolManager, ) -> None: - """Copy robots.txt to www_root.""" + """Build robots.txt to www_root.""" if not www_root.exists(): - logging.info("Skipping copying robots.txt (www root does not even exist).") + logging.info("Skipping robots.txt generation (www root does not even exist).") return - logging.info("Copying robots.txt...") + logging.info("Starting robots.txt generation...") template_path = HERE / "templates" / "robots.txt" + template = jinja2.Template(template_path.read_text(encoding="UTF-8")) + rendered_template = template.render(versions=versions) robots_path = www_root / "robots.txt" - shutil.copyfile(template_path, robots_path) + robots_path.write_text(rendered_template, encoding="UTF-8") robots_path.chmod(0o775) chgrp(robots_path, group=group) if not skip_cache_invalidation: diff --git a/templates/robots.txt b/templates/robots.txt index 291c068..9fd0f3c 100644 --- a/templates/robots.txt +++ b/templates/robots.txt @@ -6,22 +6,8 @@ Disallow: /dev Disallow: /release # Disallow EOL versions -Disallow: /2/ -Disallow: /2.0/ -Disallow: /2.1/ -Disallow: /2.2/ -Disallow: /2.3/ -Disallow: /2.4/ -Disallow: /2.5/ -Disallow: /2.6/ -Disallow: /2.7/ -Disallow: /3.0/ -Disallow: /3.1/ -Disallow: /3.2/ -Disallow: /3.3/ -Disallow: /3.4/ -Disallow: /3.5/ -Disallow: /3.6/ -Disallow: /3.7/ -Disallow: /3.8/ -Disallow: /3.9/ +{% for version in versions -%} +{% if version.status == "EOL" -%} +Disallow: /{{ version.name }}/ +{% endif -%} +{% endfor %} diff --git a/tests/test_build_docs.py b/tests/test_build_docs.py index 028da90..4947ccf 100644 --- a/tests/test_build_docs.py +++ b/tests/test_build_docs.py @@ -1,6 +1,9 @@ +from pathlib import Path +from unittest.mock import patch + import pytest -from build_docs import format_seconds +from build_docs import Version, Versions, build_robots_txt, format_seconds @pytest.mark.parametrize( @@ -24,3 +27,21 @@ ) def test_format_seconds(seconds: float, expected: str) -> None: assert format_seconds(seconds) == expected + + +@patch("build_docs.chgrp") +def test_build_robots_txt(mock_chgrp, tmp_path) -> None: + versions = Versions([ + Version(name="3.14", status="EOL", branch_or_tag="3.14"), + Version(name="3.15", status="EOL", branch_or_tag="3.15"), + Version(name="3.16", status="security-fixes", branch_or_tag="3.16"), + Version(name="3.17", status="stable", branch_or_tag="2.17"), + ]) + + build_robots_txt(versions, tmp_path, group="", skip_cache_invalidation=True, http=None) + + result = (tmp_path / "robots.txt").read_text() + assert "Disallow: /3.14/" in result + assert "Disallow: /3.15/" in result + assert "/3.16/" not in result + assert "/3.17/" not in result From f65ebe2f2b2d2a892673b9fd46d7bff0664a96d3 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 1 Apr 2026 20:11:16 +0200 Subject: [PATCH 3/4] ruff wrap --- tests/test_build_docs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_build_docs.py b/tests/test_build_docs.py index 4947ccf..8385ceb 100644 --- a/tests/test_build_docs.py +++ b/tests/test_build_docs.py @@ -1,4 +1,3 @@ -from pathlib import Path from unittest.mock import patch import pytest @@ -38,7 +37,9 @@ def test_build_robots_txt(mock_chgrp, tmp_path) -> None: Version(name="3.17", status="stable", branch_or_tag="2.17"), ]) - build_robots_txt(versions, tmp_path, group="", skip_cache_invalidation=True, http=None) + build_robots_txt( + versions, tmp_path, group="", skip_cache_invalidation=True, http=None + ) result = (tmp_path / "robots.txt").read_text() assert "Disallow: /3.14/" in result From b374803514c1b4211bcb6616bc7f7b1dfd9ad2f6 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Wed, 1 Apr 2026 20:29:59 +0200 Subject: [PATCH 4/4] Hard code the ancient ones --- templates/robots.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/templates/robots.txt b/templates/robots.txt index 9fd0f3c..deb2211 100644 --- a/templates/robots.txt +++ b/templates/robots.txt @@ -6,6 +6,13 @@ Disallow: /dev Disallow: /release # Disallow EOL versions +Disallow: /2/ +Disallow: /2.0/ +Disallow: /2.1/ +Disallow: /2.2/ +Disallow: /2.3/ +Disallow: /2.4/ +Disallow: /2.5/ {% for version in versions -%} {% if version.status == "EOL" -%} Disallow: /{{ version.name }}/