Add the gifn-apply test tool

Add a tool that checks out the repositories mentioned in
a git-if-needed patch series, runs gif-it-needed, and performs
some checks on its operation. This tool will eventually be used in
a Zuul test job for this repository.

Change-Id: Id02fb7c21f5ab34d9639bf845fcc3961d929b13b
diff --git a/tools/git-if-needed/.editorconfig b/tools/git-if-needed/.editorconfig
index 26fbee5..b9f4c15 100644
--- a/tools/git-if-needed/.editorconfig
+++ b/tools/git-if-needed/.editorconfig
@@ -1,4 +1,7 @@
 # https://editorconfig.org/
+#
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
 
 root = true
 
@@ -20,6 +23,14 @@
 indent_style = tab
 tab_size = 8
 
+[*.py]
+indent_style = space
+indent_size = 4
+
+[*.toml]
+indent_style = space
+indent_size = 2
+
 [sh/git-if-needed]
 indent_style = tab
 tab_size = 8
diff --git a/tools/git-if-needed/.reuse/dep5 b/tools/git-if-needed/.reuse/dep5
new file mode 100644
index 0000000..15094c2
--- /dev/null
+++ b/tools/git-if-needed/.reuse/dep5
@@ -0,0 +1,9 @@
+Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: gifn-apply
+Upstream-Contact: StorPool <support@storpool.com>
+Source: https://spfactory.storpool.com/r/plugins/gitiles/sp-osci
+
+Files:
+  tests/data/*
+Copyright: StorPool <support@storpool.com>
+License: BSD-2-Clause
diff --git a/tools/git-if-needed/LICENSES/BSD-2-Clause.txt b/tools/git-if-needed/LICENSES/BSD-2-Clause.txt
new file mode 100644
index 0000000..5f662b3
--- /dev/null
+++ b/tools/git-if-needed/LICENSES/BSD-2-Clause.txt
@@ -0,0 +1,9 @@
+Copyright (c) <year> <owner> 
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/tools/git-if-needed/nix/test.nix b/tools/git-if-needed/nix/test.nix
index 96af1d8..054bb33 100644
--- a/tools/git-if-needed/nix/test.nix
+++ b/tools/git-if-needed/nix/test.nix
@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
 { pkgs ? import <nixpkgs> {}, perl-ver ? 536 }:
 let
   perl-tree-name = "perl${toString perl-ver}Packages";
diff --git a/tools/git-if-needed/pyproject.toml b/tools/git-if-needed/pyproject.toml
new file mode 100644
index 0000000..894660d
--- /dev/null
+++ b/tools/git-if-needed/pyproject.toml
@@ -0,0 +1,71 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
+[build-system]
+requires = ["setuptools >= 61", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "gifn_apply"
+version = "0.1.0"
+description = "Functional tests for the git-if-needed tool"
+readme = "python/README.md"
+requires-python = ">= 3.8"
+
+[[project.authors]]
+name = "StorPool"
+email = "support@storpool.com"
+
+[project.scripts]
+gifn_apply = "gifn_apply.__main__:main"
+
+[tool.setuptools]
+zip-safe = true
+package-dir = {"" = "python"}
+packages = ["gifn_apply"]
+
+[tool.black]
+line-length = 100
+
+[tool.mypy]
+strict = true
+python_version = "3.8"
+
+[tool.pylint]
+py-version = "3.8"
+load-plugins = [
+  "pylint.extensions.bad_builtin",
+  "pylint.extensions.broad_try_clause",
+  "pylint.extensions.check_elif",
+  "pylint.extensions.code_style",
+  "pylint.extensions.comparetozero",
+  "pylint.extensions.comparison_placement",
+  "pylint.extensions.confusing_elif",
+  "pylint.extensions.consider_refactoring_into_while_condition",
+  "pylint.extensions.consider_ternary_expression",
+  "pylint.extensions.dict_init_mutate",
+  "pylint.extensions.docparams",
+  "pylint.extensions.docstyle",
+  "pylint.extensions.dunder",
+  # "pylint.extensions.empty_comment",  # the license text triggers this
+  "pylint.extensions.emptystring",
+  "pylint.extensions.eq_without_hash",
+  "pylint.extensions.for_any_all",
+  "pylint.extensions.magic_value",
+  "pylint.extensions.mccabe",
+  "pylint.extensions.no_self_use",
+  "pylint.extensions.overlapping_exceptions",
+  "pylint.extensions.private_import",
+  "pylint.extensions.redefined_loop_name",
+  "pylint.extensions.redefined_variable_type",
+  "pylint.extensions.set_membership",
+  "pylint.extensions.typing",
+  "pylint.extensions.while_used",
+]
+disable = [
+  # Clarity
+  "consider-using-assignment-expr",
+]
+
+[tool.test-stages]
+stages = ["ruff and not @manual", "@check and not @manual", "@tests and not @manual"]
diff --git a/tools/git-if-needed/python/README.md b/tools/git-if-needed/python/README.md
new file mode 100644
index 0000000..5b68b7d
--- /dev/null
+++ b/tools/git-if-needed/python/README.md
@@ -0,0 +1,8 @@
+<!--
+SPDX-FileCopyrightText: StorPool <support@storpool.com>
+SPDX-License-Identifier: BSD-2-Clause
+-->
+
+# gifn-apply - run git-if-needed on the specified repositories
+
+Mostly used for tests. This file needs more words.
diff --git a/tools/git-if-needed/python/config/ruff-all/pyproject.toml b/tools/git-if-needed/python/config/ruff-all/pyproject.toml
new file mode 100644
index 0000000..4efbc47
--- /dev/null
+++ b/tools/git-if-needed/python/config/ruff-all/pyproject.toml
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
+[tool.ruff]
+extend = "../ruff-base/pyproject.toml"
+select = ["ALL"]
diff --git a/tools/git-if-needed/python/config/ruff-base/pyproject.toml b/tools/git-if-needed/python/config/ruff-base/pyproject.toml
new file mode 100644
index 0000000..f3dd544
--- /dev/null
+++ b/tools/git-if-needed/python/config/ruff-base/pyproject.toml
@@ -0,0 +1,34 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
+[tool.ruff]
+target-version = "py38"
+line-length = 100
+select = []
+ignore = [
+  # We know what "self" is, we hope
+  "ANN101",
+
+  # We leave most of the formatting to the 'black' tool
+  "COM812",
+
+  # This is our style
+  "D203",
+  "D213",
+
+  # Much too restrictive
+  "EM",
+
+  # We have slightly different ideas about the `typing` imports
+  "I",
+
+  # Much too restrictive
+  "TRY003",
+
+  # This seems to be a bug in Ruff
+  "PLE1205",
+]
+
+[tool.ruff.per-file-ignores]
+# This is a test suite
+"*/unit_tests/*" = ["S101"]
diff --git a/tools/git-if-needed/python/config/ruff-most/pyproject.toml b/tools/git-if-needed/python/config/ruff-most/pyproject.toml
new file mode 100644
index 0000000..26587b8
--- /dev/null
+++ b/tools/git-if-needed/python/config/ruff-most/pyproject.toml
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
+[tool.ruff]
+extend = "../ruff-base/pyproject.toml"
+# The list of linters from Ruff 0.0.253
+select = [
+  "A",
+  "ANN",
+  "ARG",
+  "B",
+  "BLE",
+  "C4",
+  "C90",
+  "COM",
+  "D",
+  "DJ",
+  "DTZ",
+  "E",
+  "EM",
+  "ERA",
+  "EXE",
+  "F",
+  "FBT",
+  "G",
+  "I",
+  "ICN",
+  "INP",
+  "ISC",
+  "N",
+  "NPY",
+  "PD",
+  "PGH",
+  "PIE",
+  "PL",
+  "PT",
+  "PTH",
+  "PYI",
+  "Q",
+  "RET",
+  "RSE",
+  "RUF",
+  "S",
+  "SIM",
+  "SLF",
+  "T10",
+  "T20",
+  "TCH",
+  "TID",
+  "TRY",
+  "UP",
+  "W",
+  "YTT",
+]
diff --git a/tools/git-if-needed/python/gifn_apply/__init__.py b/tools/git-if-needed/python/gifn_apply/__init__.py
new file mode 100644
index 0000000..6090542
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Run git-if-needed on the specified repositories."""
diff --git a/tools/git-if-needed/python/gifn_apply/__main__.py b/tools/git-if-needed/python/gifn_apply/__main__.py
new file mode 100644
index 0000000..3a07a08
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/__main__.py
@@ -0,0 +1,300 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Make sure the specified patches apply to the specified repositories."""
+
+from __future__ import annotations
+
+import argparse
+import dataclasses
+import logging
+import functools
+import pathlib
+import sys
+import tempfile
+
+from typing import Final
+
+from . import defs
+from . import gifn
+from . import git
+from . import quilt
+from . import repo_url
+
+
+@dataclasses.dataclass(frozen=True)
+class RepoChanges:
+    """A repo directory along with some metadata."""
+
+    repo: defs.Repo
+    repo_dir: pathlib.Path
+    changes: list[str]
+    changes_set: set[str]
+
+
+@dataclasses.dataclass(frozen=True)
+class Mode:
+    """The abstract-ish base class for the gifn-apply tool's mode of operation."""
+
+
+@dataclasses.dataclass(frozen=True)
+class ModeHandled(Mode):
+    """The parse_args() function handled everything, exit with the specified code."""
+
+    return_code: int
+
+
+@dataclasses.dataclass(frozen=True)
+class ModeRun(Mode):
+    """Run the tests with the specified config."""
+
+    cfg: defs.Config
+
+
+def _validate_series_filename(value: str) -> str:
+    """Make sure `--series-file` specifies a non-empty filename, no path components."""
+    path: Final = pathlib.Path(value)
+    if len(path.parts) != 1:
+        raise ValueError("The series filename may not contain any path components")
+    if not path.parts[0]:
+        raise ValueError("The series filename cannot be empty")
+    return path.parts[0]
+
+
+def _show_version() -> None:
+    """Display program version information."""
+    print(f"gifn-apply {defs.VERSION}")  # noqa: T201
+
+
+def _show_features() -> None:
+    """Display program features information."""
+    print(f"Features: gifn-apply={defs.VERSION} repo-url=0.1 quilt=0.1")  # noqa: T201
+
+
+def _build_logger(*, verbose: bool) -> logging.Logger:
+    """Build a logger that outputs messages to the standard output stream."""
+    logger: Final = logging.getLogger()
+    logger.setLevel(logging.DEBUG)
+
+    stdout_handler: Final = logging.StreamHandler(stream=sys.stdout)
+    stdout_handler.addFilter(lambda record: record.levelno == logging.INFO)
+    stdout_handler.setLevel(logging.INFO)
+    logger.addHandler(stdout_handler)
+
+    stderr_handler: Final = logging.StreamHandler(stream=sys.stderr)
+    if verbose:
+        stderr_handler.addFilter(lambda record: record.levelno != logging.INFO)
+        stderr_handler.setLevel(logging.DEBUG)
+    else:
+        stderr_handler.setLevel(logging.WARNING)
+    logger.addHandler(stderr_handler)
+
+    return logger
+
+
+def _parse_args() -> Mode:
+    """Parse the command-line arguments."""
+    parser: Final = argparse.ArgumentParser(prog="gifn-apply")
+    parser.add_argument(
+        "--features",
+        action="store_true",
+        help="display information about supported program features and exit",
+    )
+    parser.add_argument(
+        "-P",
+        "--program",
+        type=pathlib.Path,
+        required=True,
+        help="the path to the git-if-needed program to test",
+    )
+    parser.add_argument(
+        "-p",
+        "--patches",
+        type=pathlib.Path,
+        required=True,
+        help="the path to the patch series file",
+    )
+    parser.add_argument(
+        "-r",
+        "--repo-url",
+        type=repo_url.parse_base_url_pair,
+        action="append",
+        default=[],
+        help="'base=url' pairs, e.g. 'openstack=https://github.com/openstack/'",
+    )
+    parser.add_argument(
+        "-s",
+        "--series-file",
+        type=_validate_series_filename,
+        default="series",
+        help="the name of the series file in the patches directory (default: 'series')",
+    )
+    parser.add_argument(
+        "-v", "--verbose", action="store_true", help="verbose operation; display diagnostic output"
+    )
+    parser.add_argument(
+        "-V", "--version", action="store_true", help="display program version information and exit"
+    )
+
+    args: Final = parser.parse_args()
+
+    if args.version:
+        _show_version()
+    if args.features:
+        _show_features()
+    if args.version or args.features:
+        return ModeHandled(0)
+
+    program: Final = args.program.resolve()
+    if not program.is_file():
+        sys.exit(f"Not a regular file: {program}")
+
+    patches: Final = args.patches.resolve()
+    if not patches.is_dir():
+        sys.exit(f"Not a patches directory: {patches}")
+
+    series_file: Final = args.series_file
+    assert isinstance(series_file, str) and series_file  # noqa: S101,PT018
+
+    repo_urls: Final = repo_url.get_env_repo_urls()
+    for pair in args.repo_url:
+        repo_urls[pair.base] = repo_url.RepoURLOK(pair.url)
+
+    repo_urls_ok: Final = {}
+    for base, res in sorted(repo_urls.items()):
+        if isinstance(res, repo_url.RepoURLError):
+            sys.exit(f"Invalid base URL for {base!r} / {res.name!r}: {res.value!r}: {res.err}")
+        assert isinstance(res, repo_url.RepoURLOK)  # noqa: S101
+        repo_urls_ok[base] = res.url
+
+    return ModeRun(
+        defs.Config(
+            log=_build_logger(verbose=args.verbose),
+            program=program,
+            patches=patches,
+            series=patches / series_file,
+            repo_urls=repo_urls_ok,
+        )
+    )
+
+
+@functools.singledispatch
+def _do_it(mode: Mode) -> None:
+    """Do what the caller requested."""
+    sys.exit(f"gifn-apply internal error: _do_it(): unhandled mode {mode!r}")
+
+
+@_do_it.register
+def _do_it_handled(mode: ModeHandled) -> None:
+    """parse_args() did everything it wanted to, let's just go."""
+    sys.exit(mode.return_code)
+
+
+@_do_it.register
+def _do_it_run(mode: ModeRun) -> None:
+    """Run some tests."""
+    cfg: Final = mode.cfg
+    with tempfile.TemporaryDirectory(prefix="gifn-apply.") as tempd_obj:
+        tempd: Final = pathlib.Path(tempd_obj)
+        cfg.log.debug("Using %(tempd)s as a temporary directory", {"tempd": tempd})
+
+        patches, repos_needed = quilt.parse_series(cfg)
+        cfg.log.debug(
+            "Need to check out %(repos)d repos, then apply %(patches)d patches",
+            {"repos": len(repos_needed), "patches": len(patches)},
+        )
+
+        def list_change_ids(
+            cfg: defs.Config, repo: defs.Repo, repo_dir: pathlib.Path
+        ) -> RepoChanges:
+            """List the changes in the cloned or manipulated repository."""
+            changes: Final = git.list_change_ids(cfg, repo_dir)
+            return RepoChanges(repo, repo_dir, changes, set(changes))
+
+        def clone_repo(repo: defs.Repo) -> RepoChanges:
+            """Clone a single repo, get the list of changes."""
+            cfg.log.info(
+                "Cloning the %(repo)s repo from %(origin)s",
+                {"repo": repo.repo, "origin": repo.origin},
+            )
+            repo_dir: Final = git.repo_clone(cfg, repo, tempd)
+            res: Final = list_change_ids(cfg, repo, repo_dir)
+            cfg.log.debug(
+                "- cloned into %(repo_dir)s, got %(changes)d changes",
+                {"repo_dir": res.repo_dir, "changes": len(res.changes)},
+            )
+            return res
+
+        def check_repo(repo: defs.Repo, rchanges: RepoChanges) -> RepoChanges:
+            """Make sure a repo contains all the changes applied before and maybe some more."""
+            cfg.log.info(
+                "Checking the %(repo)s repo from %(origin)s for new changes",
+                {"repo": repo.repo, "origin": repo.origin},
+            )
+            if rchanges.repo != repo:
+                sys.exit(
+                    f"Internal error: {repo=!r} not the same as {rchanges.repo=!r} for "
+                    f"{rchanges.repo_dir=!r} {len(rchanges.changes)} changes"
+                )
+
+            res: Final = list_change_ids(cfg, repo, rchanges.repo_dir)
+            len_before: Final = len(rchanges.changes)
+            len_after: Final = len(res.changes)
+            if len_after < len_before:
+                sys.exit(
+                    f"gifn weirdness: {len_after=!r} < {len_before=!r} for "
+                    f"{rchanges.repo=!r} at {rchanges.repo_dir=!r}"
+                )
+            if res.changes[:len_before] != rchanges.changes:
+                sys.exit(
+                    f"gifn weirdness: the first {len_before} changes are not the same for "
+                    f"{rchanges.repo=!r} at {rchanges.repo_dir=!r}"
+                )
+            if not rchanges.changes_set.issubset(res.changes_set):
+                sys.exit(
+                    f"gifn weirdness: the {len_before=!r} changes are not "
+                    f"contained within the {len_after=!r} ones for "
+                    f"{rchanges.repo=!r} at {rchanges.repo_dir=!r}"
+                )
+
+            return res
+
+        repos_before: Final = {repo: clone_repo(repo) for repo in repos_needed}
+
+        cfg.log.info("Making sure that at least one of the changes has not been applied yet")
+        unapplied_before_count: Final = len(
+            [
+                patch
+                for patch in patches
+                if all(
+                    patch.change_id not in rchanges.changes_set
+                    for rchanges in repos_before.values()
+                )
+            ]
+        )
+        if not unapplied_before_count:
+            sys.exit("All the patches have been applied already")
+
+        gifn.apply_series(cfg, tempd)
+        repos_after: Final = {
+            repo: check_repo(repo, rchanges) for repo, rchanges in repos_before.items()
+        }
+
+        cfg.log.info("Making sure that at least one of the changes has not been applied yet")
+        unapplied_after: Final = [
+            str(patch.relpath)
+            for patch in patches
+            if all(patch.change_id not in rchanges.changes_set for rchanges in repos_after.values())
+        ]
+        if unapplied_after:
+            sys.exit(f"Some of the patches were not applied: {' '.join(unapplied_after)}")
+
+        cfg.log.info("Everything seems to be fine!")
+
+
+def main() -> None:
+    """Parse command-line options, run the test."""
+    _do_it(_parse_args())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/git-if-needed/python/gifn_apply/defs.py b/tools/git-if-needed/python/gifn_apply/defs.py
new file mode 100644
index 0000000..32b13e8
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/defs.py
@@ -0,0 +1,52 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Common definitions for the gifn-apply routines."""
+
+from __future__ import annotations
+
+import dataclasses
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    import logging
+    import pathlib
+    import urllib.parse as uparse
+
+
+VERSION = "0.1.0"
+
+
+class GApplyError(Exception):
+    """The base class for errors that occurred during the gifn-apply operation."""
+
+
+@dataclasses.dataclass(frozen=True, order=True)
+class Repo:
+    """A repository split into the origin fragment and the name/path within."""
+
+    origin: str
+    repo: str
+
+    @property
+    def path(self) -> str:
+        """Combine the origin and the repo path."""
+        return f"{self.origin}/{self.repo}"
+
+
+@dataclasses.dataclass(frozen=True)
+class RepoURL:
+    """A parsed URL for a repo base."""
+
+    url: uparse.ParseResult
+
+
+@dataclasses.dataclass(frozen=True)
+class Config:
+    """Runtime configuration for the gifn-apply tool."""
+
+    log: logging.Logger
+    program: pathlib.Path
+    patches: pathlib.Path
+    series: pathlib.Path
+    repo_urls: dict[str, RepoURL]
diff --git a/tools/git-if-needed/python/gifn_apply/gifn.py b/tools/git-if-needed/python/gifn_apply/gifn.py
new file mode 100644
index 0000000..3e313a8
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/gifn.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Peter Pentchev <roam@ringlet.net>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Run git-if-needed."""
+
+from __future__ import annotations
+
+import shlex
+import subprocess
+
+from typing import TYPE_CHECKING
+
+from . import defs
+
+if TYPE_CHECKING:
+    import pathlib
+
+
+def apply_series(cfg: defs.Config, tempd: pathlib.Path) -> None:
+    """Run git-if-needed to apply all the patches in a series file."""
+    cmd: list[str | pathlib.Path] = [cfg.program, "-s", cfg.series, "--", "am"]
+    cmdstr = shlex.join(str(arg) for arg in cmd)
+    cfg.log.debug("Running `%(cmdstr)s`", {"cmdstr": cmdstr})
+    try:
+        subprocess.run(cmd, check=True, cwd=tempd, shell=False)
+    except (OSError, subprocess.CalledProcessError) as err:
+        raise defs.GApplyError(f"Could not run `{cmdstr}` in {tempd}: {err}") from err
diff --git a/tools/git-if-needed/python/gifn_apply/git.py b/tools/git-if-needed/python/gifn_apply/git.py
new file mode 100644
index 0000000..038d167
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/git.py
@@ -0,0 +1,94 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Clone a Git repository."""
+
+from __future__ import annotations
+
+import subprocess
+
+from typing import TYPE_CHECKING
+
+from . import defs
+
+if TYPE_CHECKING:
+    import pathlib
+
+    from typing import Final
+
+
+class GitError(defs.GApplyError):
+    """An error that occurred during a Git-related operation."""
+
+
+def repo_clone(cfg: defs.Config, repo: defs.Repo, tempd: pathlib.Path) -> pathlib.Path:
+    """Clone a Git repository."""
+    loc: Final = cfg.repo_urls.get(repo.origin.upper())
+    if loc is None:
+        raise GitError(f"Unknown repository origin {repo.origin!r}")
+    cfg.log.info(
+        "Cloning the %(origin)s %(repo)s repository from %(loc)s",
+        {"origin": repo.origin, "repo": repo.repo, "loc": loc.url.geturl()},
+    )
+
+    repo_url: Final = loc.url._replace(
+        path=loc.url.path + ("" if loc.url.path.endswith("/") else "/") + repo.repo
+    )
+    repo_dir: Final = tempd / repo.origin / repo.repo
+    if repo_dir.exists() or repo_dir.is_symlink():
+        raise GitError(f"Did not expect {repo_dir} to exist")
+    repo_dir.parent.mkdir(mode=0o755, exist_ok=True, parents=True)
+
+    cfg.log.debug(
+        "About to clone %(repo_url)s into %(repo_dir)s",
+        {"repo_url": repo_url.geturl(), "repo_dir": repo_dir},
+    )
+    try:
+        subprocess.run(
+            ["git", "clone", repo_url.geturl(), repo.repo, "-b", "master"],
+            check=True,
+            cwd=repo_dir.parent,
+        )
+    except (OSError, subprocess.CalledProcessError) as err:
+        raise GitError(
+            f"Could not run `git clone {repo_url.geturl()} {repo.repo}` in {tempd}: {err}"
+        ) from err
+    if not repo_dir.is_dir():
+        raise GitError(f"`git clone` did not create {repo_dir}")
+
+    return repo_dir
+
+
+def list_change_ids(cfg: defs.Config, repo_dir: pathlib.Path) -> list[str]:
+    """Get the Change-Id fields of all the commits reachable from the current head."""
+    cfg.log.info("Getting the change IDs in %(repo_dir)s", {"repo_dir": repo_dir})
+    try:
+        lines = [
+            line
+            for line in subprocess.check_output(
+                ["git", "log", "--pretty=%(trailers:key=Change-Id)", "--reverse"],
+                cwd=repo_dir,
+                encoding="UTF-8",
+                shell=False,
+            ).splitlines()
+            if line
+        ]
+    except (OSError, subprocess.CalledProcessError) as err:
+        raise GitError(f"Could not run `git log` for change IDs in {repo_dir}: {err}") from err
+    except ValueError as err:
+        raise GitError(
+            f"Could not decode the output of `git log` in {repo_dir} into UTF-8 change IDs: {err}"
+        ) from err
+
+    def parse_line(line: str) -> str:
+        """Parse a "Change-Id: Ixxx" line."""
+        fields: Final = line.split()
+        # The magic value will go away once we can use structural pattern matching
+        if (
+            len(fields) != 2  # noqa: PLR2004  # pylint: disable=magic-value-comparison
+            or fields[0] != "Change-Id:"  # pylint: disable=magic-value-comparison
+            or not fields[1].startswith("I")
+        ):
+            raise GitError(f"Unexpected `git log` ouput for change IDs: {line!r}")
+        return fields[1]
+
+    return [parse_line(line) for line in lines]
diff --git a/tools/git-if-needed/python/gifn_apply/quilt.py b/tools/git-if-needed/python/gifn_apply/quilt.py
new file mode 100644
index 0000000..3de9049
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/quilt.py
@@ -0,0 +1,88 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Parse a quilt patches file, then parse the patches themselves."""
+
+from __future__ import annotations
+
+import dataclasses
+import pathlib
+import re
+
+from . import defs
+
+
+_REPO_PARTS = 2
+
+_RE_CHANGE_ID = re.compile(r"^ \s* Change-Id \s* : \s* (?P<value> I [0-9a-f]+ ) \s* $", re.X)
+
+_RE_DIFF_START = re.compile(r"^ --- [ ]", re.X)
+
+
+class QuiltError(defs.GApplyError):
+    """An error that occurred while parsing the quilt patch structure."""
+
+
+@dataclasses.dataclass(frozen=True)
+class Patch:
+    """A single patch read from the quilt series file."""
+
+    change_id: str
+    filename: str
+    path: pathlib.Path
+    relpath: pathlib.Path
+    repo: defs.Repo
+
+
+def _extract_change_id(patchfile: pathlib.Path) -> str:
+    """Extract the value of the patch's Change-Id trailer."""
+    change_id = None
+    for line in patchfile.read_text(encoding="UTF-8").splitlines():
+        if _RE_DIFF_START.match(line):
+            if change_id is None:
+                raise defs.GApplyError(f"No Change-Id line found in {patchfile}")
+
+            return change_id
+
+        change = _RE_CHANGE_ID.match(line)
+        if change is not None:
+            change_id = change.group("value")
+
+    raise defs.GApplyError(f"No diff start line ('--- ...') found in {patchfile}")
+
+
+def parse_series(cfg: defs.Config) -> tuple[list[Patch], list[defs.Repo]]:
+    """Parse a series file, return a list of patches and a list of repository names."""
+    repos = set()
+
+    def parse_line(sline: str) -> Patch:
+        """Parse a single relative patch filename read from the series file."""
+        fields = sline.split()
+        if len(fields) != 1:
+            raise NotImplementedError(f"quilt patch options not supported yet: {sline!r}")
+        filename = fields[0]
+
+        relpath = pathlib.Path(filename)
+        if (
+            relpath.is_absolute()
+            or len(relpath.parts) <= _REPO_PARTS
+            or any(part.startswith(".") for part in relpath.parts)
+        ):
+            raise QuiltError(f"Invalid patch filename {filename!r} in {cfg.series}")
+
+        repo = defs.Repo(origin=relpath.parts[0], repo="/".join(relpath.parts[1:_REPO_PARTS]))
+        repos.add(repo)
+
+        patchfile = cfg.patches / relpath
+        if not patchfile.is_file():
+            raise QuiltError(f"Need a regular patch file at {patchfile}")
+
+        change_id = _extract_change_id(patchfile)
+        return Patch(
+            change_id=change_id, filename=filename, path=patchfile, relpath=relpath, repo=repo
+        )
+
+    if not cfg.series.is_file():
+        raise QuiltError(f"Need a regular series file at {cfg.series}")
+
+    res = [parse_line(line) for line in cfg.series.read_text(encoding="UTF-8").splitlines()]
+    return res, sorted(repos)
diff --git a/tools/git-if-needed/python/gifn_apply/repo_url.py b/tools/git-if-needed/python/gifn_apply/repo_url.py
new file mode 100644
index 0000000..3204811
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/repo_url.py
@@ -0,0 +1,122 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Parse base/URL repository location pairs."""
+
+from __future__ import annotations
+
+import dataclasses
+import os
+import re
+import urllib.parse as uparse
+
+from . import defs
+from . import util
+
+
+_RE_ENV_BASE = re.compile(r"^ [A-Z][A-Z0-9_]* $", re.X)
+
+
+@dataclasses.dataclass(frozen=True)
+class RepoURLResult:
+    """Base class for the OK/error parsed URL dichotomy."""
+
+
+@dataclasses.dataclass(frozen=True)
+class RepoURLOK(RepoURLResult):
+    """Successfully parsed a base URL for repositories."""
+
+    url: defs.RepoURL
+
+
+@dataclasses.dataclass(frozen=True)
+class RepoURLError(RepoURLResult):
+    """Could not parse a base URL for repositories."""
+
+    name: str
+    value: str
+    err: ValueError
+
+
+@dataclasses.dataclass(frozen=True)
+class RepoURLPair:
+    """A base/URL pair for an URL obtained from the command line."""
+
+    base: str
+    url: defs.RepoURL
+
+
+def _slash_extend(url: uparse.ParseResult) -> uparse.ParseResult:
+    """Add a / at the end of the path if there is none."""
+    if url.path.endswith("/"):
+        return url
+
+    return url._replace(path=url.path + "/")
+
+
+def _validate_file(name: str, value: str, url: uparse.ParseResult) -> RepoURLResult:
+    """Make sure a file:// URL has no host and an absolute path."""
+    if url.netloc:
+        return RepoURLError(name, value, ValueError("No hostname expected for a 'file' URL"))
+    if not url.path.startswith("/"):
+        return RepoURLError(name, value, ValueError("Expected an absolute path for a 'file' URL"))
+
+    url = _slash_extend(url)
+    return RepoURLOK(defs.RepoURL(url))
+
+
+def _validate_http(name: str, value: str, url: uparse.ParseResult) -> RepoURLResult:
+    """Make sure a http(s):// URL has a host, slash-terminate the path."""
+    if not url.netloc:
+        return RepoURLError(
+            name, value, ValueError("Expected a hostname for 'http' or 'https' URLs")
+        )
+
+    url = _slash_extend(url)
+    return RepoURLOK(defs.RepoURL(url))
+
+
+_SCHEME_VALIDATORS = {
+    "file": _validate_file,
+    "http": _validate_http,
+    "https": _validate_http,
+}
+
+
+def parse_url(name: str, base: str, value: str) -> RepoURLResult:
+    """Parse and validate a single base/URL pair."""
+    if not _RE_ENV_BASE.match(base):
+        return RepoURLError(name, value, ValueError(f"Invalid URL base {base!r}"))
+
+    try:
+        url = uparse.urlparse(value)
+    except ValueError as err:
+        return RepoURLError(name, value, err)
+
+    validator = _SCHEME_VALIDATORS.get(url.scheme)
+    if validator is None:
+        return RepoURLError(
+            name, value, ValueError("Expected 'http', 'https', or 'file' as the URL scheme")
+        )
+    return validator(name, value, url)
+
+
+def get_env_repo_urls(environ: dict[str, str] | None = None) -> dict[str, RepoURLResult]:
+    """Parse the REPO_URL_<base> environment variables."""
+    if environ is None:
+        environ = dict(os.environ)
+
+    res: dict[str, RepoURLResult] = {
+        "OPENSTACK": RepoURLOK(defs.RepoURL(uparse.urlparse("https://github.com/openstack")))
+    }
+    for name, value in environ.items():
+        base = util.str_removeprefix(name, "REPO_URL_")
+        if base == name:
+            continue
+        res[base] = parse_url(name, base, value)
+
+    return res
+
+
+def parse_base_url_pair(arg: str) -> RepoURLPair:
+    """Parse a `--repo-url base=url` command-line argument."""
+    raise NotImplementedError(repr(arg))
diff --git a/tools/git-if-needed/python/gifn_apply/util.py b/tools/git-if-needed/python/gifn_apply/util.py
new file mode 100644
index 0000000..aa78ae7
--- /dev/null
+++ b/tools/git-if-needed/python/gifn_apply/util.py
@@ -0,0 +1,12 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Common utilities for the gifn-apply modules."""
+
+
+# This will go away with Python 3.9's str.removeprefix().
+def str_removeprefix(value: str, prefix: str) -> str:
+    """Return the value with the prefix stripped if it was there, otherwise unchanged."""
+    if not value.startswith(prefix):
+        return value
+
+    return value[len(prefix) :]
diff --git a/tools/git-if-needed/python/requirements/test.txt b/tools/git-if-needed/python/requirements/test.txt
new file mode 100644
index 0000000..fc5b69c
--- /dev/null
+++ b/tools/git-if-needed/python/requirements/test.txt
@@ -0,0 +1,4 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
+pytest >= 7, < 8
diff --git a/tools/git-if-needed/python/unit_tests/__init__.py b/tools/git-if-needed/python/unit_tests/__init__.py
new file mode 100644
index 0000000..621bbac
--- /dev/null
+++ b/tools/git-if-needed/python/unit_tests/__init__.py
@@ -0,0 +1,3 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Unit tests for the gifn-apply tool."""
diff --git a/tools/git-if-needed/python/unit_tests/test_repo_urls.py b/tools/git-if-needed/python/unit_tests/test_repo_urls.py
new file mode 100644
index 0000000..54c6bf9
--- /dev/null
+++ b/tools/git-if-needed/python/unit_tests/test_repo_urls.py
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Test the parsing of base/URL pairs."""
+
+from __future__ import annotations
+
+from typing import NamedTuple
+
+import pytest
+
+from gifn_apply import repo_url
+
+
+class TrivURL(NamedTuple):
+    """The basic elements of a parsed URL."""
+
+    scheme: str
+    netloc: str
+    path: str
+
+
+PARSE_URLS: list[tuple[str, str, TrivURL | None]] = [
+    ("", "https://github.com/openstack", None),
+    ("ostack", "https://github.com/openstack", None),
+    ("OSTACK!", "https://github.com/openstack", None),
+    ("OSTACK", "https://github.com/openstack", TrivURL("https", "github.com", "/openstack/")),
+    ("OSTACK", "https://github.com/openstack/", TrivURL("https", "github.com", "/openstack/")),
+    ("local", "file:///absolute/path", None),
+    ("LOCAL_3!", "file:///absolute/path", None),
+    ("LOCAL_3", "file://host", None),
+    ("LOCAL_3", "file:relative/path", None),
+    ("LOCAL_3", "file:///absolute/path", TrivURL("file", "", "/absolute/path/")),
+    ("OSTACK", "file:///absolute/path", TrivURL("file", "", "/absolute/path/")),
+    ("OSTACK", "httpx://github.com/openstack", None),
+    ("OSTACK", "loc", None),
+    ("OSTACK", "/path", None),
+    ("OSTACK", ":path", None),
+]
+
+
+@pytest.mark.parametrize(("base", "value", "expected"), PARSE_URLS)
+def test_parse_url(base: str, value: str, expected: TrivURL | None) -> None:
+    """Test the base parse_url() function."""
+    res = repo_url.parse_url(f"R_{base}", base, value)
+    if expected is None:
+        assert isinstance(res, repo_url.RepoURLError)
+    else:
+        assert isinstance(res, repo_url.RepoURLOK)
+        url = res.url.url
+        assert (url.scheme, url.netloc, url.path) == expected
diff --git a/tools/git-if-needed/python/unit_tests/test_util.py b/tools/git-if-needed/python/unit_tests/test_util.py
new file mode 100644
index 0000000..bdcb5ad
--- /dev/null
+++ b/tools/git-if-needed/python/unit_tests/test_util.py
@@ -0,0 +1,21 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+"""Test the gifn_apply.util functions."""
+
+import pytest
+
+from gifn_apply import util
+
+
+@pytest.mark.parametrize(
+    ("value", "prefix", "expected"),
+    [
+        ("hello", "goodbye", "hello"),
+        ("hello", "hel", "lo"),
+        ("hel", "hello", "hel"),
+        ("hello", "hello", ""),
+    ],
+)
+def test_remove_prefix(value: str, prefix: str, expected: str) -> None:
+    """Test our hand-rolled str.removeprefix() implementation."""
+    assert util.str_removeprefix(value, prefix) == expected
diff --git a/tools/git-if-needed/setup.cfg b/tools/git-if-needed/setup.cfg
new file mode 100644
index 0000000..a3d4abf
--- /dev/null
+++ b/tools/git-if-needed/setup.cfg
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
+[flake8]
+max_line_length = 100
+extend_ignore = E203
diff --git a/tools/git-if-needed/sh/git-if-needed b/tools/git-if-needed/sh/git-if-needed
index a8e5666..61f8142 100755
--- a/tools/git-if-needed/sh/git-if-needed
+++ b/tools/git-if-needed/sh/git-if-needed
@@ -1,4 +1,7 @@
 #!/bin/sh
+#
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
 
 set -e
 
diff --git a/tools/git-if-needed/shellcheckrc b/tools/git-if-needed/shellcheckrc
index 2a4d199..9682ee0 100644
--- a/tools/git-if-needed/shellcheckrc
+++ b/tools/git-if-needed/shellcheckrc
@@ -1,3 +1,6 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
 shell=dash
 enable=all
 disable=SC2250
diff --git a/tools/git-if-needed/tests/gifn-test.pl b/tools/git-if-needed/tests/gifn-test.pl
index dc8fe58..a78c262 100755
--- a/tools/git-if-needed/tests/gifn-test.pl
+++ b/tools/git-if-needed/tests/gifn-test.pl
@@ -1,28 +1,7 @@
 #!/usr/bin/perl
 #
-# Copyright (c) 2019  Peter Pentchev
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
+# SPDX-FileCopyrightText: 2019  Peter Pentchev
+# SPDX-License-Identifier: BSD-2-Clause
 
 use v5.10;
 use strict;
diff --git a/tools/git-if-needed/tox.ini b/tools/git-if-needed/tox.ini
new file mode 100644
index 0000000..fac9bfb
--- /dev/null
+++ b/tools/git-if-needed/tox.ini
@@ -0,0 +1,102 @@
+# SPDX-FileCopyrightText: StorPool <support@storpool.com>
+# SPDX-License-Identifier: BSD-2-Clause
+
+[tox]
+envlist =
+  ruff
+  ruff-all
+  black
+  pep8
+  mypy
+  pylint
+  unit-tests
+  functional-cinder
+isolated_build = true
+
+[defs]
+pyfiles =
+  python/gifn_apply
+  python/unit_tests
+
+[testenv:black]
+skip_install = True
+tags =
+  check
+deps =
+  black >= 23, < 24
+commands =
+  black --check -- {[defs]pyfiles}
+
+[testenv:black-reformat]
+skip_install = True
+tags =
+  format
+  manual
+deps =
+  black >= 23, < 24
+commands =
+  black -- {[defs]pyfiles}
+
+[testenv:pep8]
+skip_install = True
+tags =
+  check
+deps =
+  flake8 >= 6, < 7
+commands =
+  flake8 -- {[defs]pyfiles}
+
+[testenv:mypy]
+skip_install = True
+tags =
+  check
+deps =
+  -r python/requirements/test.txt
+  mypy >= 1, < 2
+commands =
+  mypy -- {[defs]pyfiles}
+
+[testenv:pylint]
+skip_install = True
+tags =
+  check
+deps =
+  -r python/requirements/test.txt
+  pylint >= 2.16, < 2.17
+commands =
+  pylint -- {[defs]pyfiles}
+
+[testenv:ruff]
+skip_install = True
+tags =
+  check
+deps =
+  ruff >= 0.0.253, < 0.1
+commands =
+  ruff check --config python/config/ruff-most/pyproject.toml -- {[defs]pyfiles}
+
+[testenv:ruff-all]
+skip_install = True
+tags =
+  check
+deps =
+  ruff == 0.0.253
+commands =
+  ruff check --config python/config/ruff-all/pyproject.toml -- {[defs]pyfiles}
+
+[testenv:unit-tests]
+tags =
+  tests
+deps =
+  -r python/requirements/test.txt
+commands =
+  pytest {posargs} python/unit_tests
+
+[testenv:functional-cinder]
+tags =
+  tests
+setenv =
+  REPO_URL_OPENSTACK = {env:REPO_URL_OPENSTACK:https://github.com/openstack}
+commands =
+  gifn_apply -v -p {toxinidir}/../../patches -P {toxinidir}/sh/git-if-needed
+  gifn_apply -v -p {toxinidir}/../../patches -P {toxinidir}/sh/git-if-needed -s series.experimental