From 3e693d0427b837362f500cee7c38b343db605045 Mon Sep 17 00:00:00 2001 From: memsharded Date: Mon, 22 Jun 2026 13:35:39 +0200 Subject: [PATCH 1/2] poc for git tree commit for pre-merge reproducibility --- test/functional/tools/scm/test_git.py | 98 +++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/test/functional/tools/scm/test_git.py b/test/functional/tools/scm/test_git.py index 42d946f2108..ff2e83c988c 100644 --- a/test/functional/tools/scm/test_git.py +++ b/test/functional/tools/scm/test_git.py @@ -4,6 +4,7 @@ import textwrap import pytest +import yaml from conan.test.assets.cmake import gen_cmakelists from conan.test.assets.sources import gen_function_cpp @@ -546,6 +547,7 @@ def source(self): assert c.load("source/folder/CMakeLists.txt") == "mycmake" assert c.load("source/folder/src/myfile.h") == "myheader!" + class TestGitCloneWithArgs: """ Git cloning passing additional arguments """ @@ -1313,3 +1315,99 @@ def test_treeless_clone_with_parenthesis(self): client.save({"conanfile.py": self.conanfile.format(url=url)}) client.run("export .") assert f"get_remote_url(): {url} ===" in client.out + + +@pytest.mark.tool("git") +class TestGitTreeCoordinates: + + conanfile_tree = textwrap.dedent(""" + from conan import ConanFile + from conan.tools.scm import Git + + class Pkg(ConanFile): + name = "pkg" + version = "0.1" + + def export(self): + Git(self, self.recipe_folder).coordinates_to_conandata(use_tree=True) + + def source(self): + Git(self).checkout_from_conandata_coordinates() + """) + + def test_tree_sha_stable(self): + """Two repos with identical source content produce the same tree SHA.""" + folder1 = os.path.join(temp_folder(), "repo1") + url1, _ = create_local_git_repo( + files={"conanfile.py": self.conanfile_tree, "src/myfile.h": "myheader!"}, + folder=folder1) + c = TestClient(light=True) + c.run_command(f'git clone "file://{url1}" .') + c.run("export .") + data1 = yaml.safe_load(open(os.path.join(c.exported_layout().export(), + "conandata.yml")).read()) + assert "commit" not in data1["scm"] + + folder2 = os.path.join(temp_folder(), "repo2") + url2, _ = create_local_git_repo( + files={"conanfile.py": self.conanfile_tree, "src/myfile.h": "myheader!"}, + folder=folder2) + c2 = TestClient(light=True) + c2.run_command(f'git clone "file://{url2}" .') + c2.run("export .") + data2 = yaml.safe_load(open(os.path.join(c2.exported_layout().export(), + "conandata.yml")).read()) + + assert data1["scm"]["tree"] == data2["scm"]["tree"] + + def test_full_scm_use_tree(self): + """Full SCM flow: export with use_tree=True, upload, source retrieval on fresh machine.""" + folder = os.path.join(temp_folder(), "myrepo") + url, _ = create_local_git_repo( + files={"conanfile.py": self.conanfile_tree, "src/myfile.h": "myheader!"}, + folder=folder) + c = TestClient(default_server_user=True, light=True) + c.run_command(f'git clone "file://{url}" .') + c.run("create .") + c.run("upload * -c -r=default") + + c2 = TestClient(servers=c.servers) + c2.run("install --requires=pkg/0.1@ --build=pkg*") + c2.run("cache path pkg/0.1 --folder=source") + source_folder = str(c2.out).strip() + assert open(os.path.join(source_folder, "src/myfile.h")).read() == "myheader!" + + def test_full_scm_use_tree_squash_merge(self): + """After a squash merge, source retrieval finds the squash commit via matching tree SHA.""" + url = git_create_bare_repo() + c = TestClient(default_server_user=True, light=True) + c.run_command(f'git clone "file://{url}" .') + c.save({"conanfile.py": self.conanfile_tree, "src/myfile.h": "myheader!"}) + c.run_command("git checkout -b feature") + git_add_changes_commit(folder=c.current_folder) + c.run_command("git push --set-upstream origin feature") + c.run("create .") + c.run("upload * -c -r=default") + + # Squash merge onto master — same files, new commit SHA; feature branch deleted + c.run_command("git checkout -b master") + c.run_command("git merge --squash feature") + git_add_changes_commit(folder=c.current_folder, msg="squash merge feature") + c.run_command("git push --set-upstream origin master") + c.run_command("git push origin --delete feature") + + c2 = TestClient(servers=c.servers, light=True) + c2.run("install --requires=pkg/0.1@ --build=pkg*") + c2.run("cache path pkg/0.1 --folder=source") + source_folder = str(c2.out).strip() + assert open(os.path.join(source_folder, "src/myfile.h")).read() == "myheader!" + + def test_dirty_repo_use_tree(self): + """Dirty repo raises an exception when capturing tree coordinates.""" + c = TestClient(light=True) + c.save({"conanfile.py": self.conanfile_tree}) + c.init_git_repo() + # Add an uncommitted file to make the repo dirty + c.save({"dirty_file.txt": "uncommitted"}) + c.run("export .", assert_error=True) + assert "Repo is dirty, cannot capture tree" in c.out From 91a959464bb94f87c3da41ef14abc442d6d146db Mon Sep 17 00:00:00 2001 From: memsharded Date: Mon, 22 Jun 2026 13:35:57 +0200 Subject: [PATCH 2/2] poc for git tree commit for pre-merge reproducibility --- conan/tools/scm/git.py | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/conan/tools/scm/git.py b/conan/tools/scm/git.py index 9775a1e6d24..2d51a5f26dd 100644 --- a/conan/tools/scm/git.py +++ b/conan/tools/scm/git.py @@ -270,26 +270,32 @@ def included_files(self): files = files.splitlines() return files - def coordinates_to_conandata(self, repository=False): + def coordinates_to_conandata(self, repository=False, use_tree=False): """ - Capture the "url" and "commit" from the Git repo, calling ``get_url_and_commit()``, and then - store those in the ``conandata.yml`` under the "scm" key. This information can be - used later to clone and checkout the exact source point that was used to create this - package, and can be useful even if the recipe uses ``exports_sources`` as mechanism to - embed the sources. + Capture SCM coordinates from Git and store them in conandata.yml under "scm". + When use_tree=True, stores root tree SHA (content fingerprint, stable across squash + merges) instead of commit SHA for reproducible recipe revisions in pre-merge CI builds. - :param repository: By default gets the commit of the defined folder, use repository=True to get - the commit of the repository instead. + The use_tree only works at the moment for repository=False """ - scm_url, scm_commit = self.get_url_and_commit(repository=repository) - update_conandata(self._conanfile, {"scm": {"commit": scm_commit, "url": scm_url}}) + if use_tree: + if self.is_dirty(repository=repository): + raise ConanException("Repo is dirty, cannot capture tree: {}".format(self.folder)) + assert repository is False, "repository=True not allowed in coordinates_to_conandata" + tree = self.run("log -1 --format=%T") + url = self.get_remote_url() + update_conandata(self._conanfile, {"scm": {"tree": tree, "url": url}}) + else: + scm_url, scm_commit = self.get_url_and_commit(repository=repository) + update_conandata(self._conanfile, {"scm": {"commit": scm_commit, "url": scm_url}}) def checkout_from_conandata_coordinates(self): - """ - Reads the "scm" field from the ``conandata.yml``, that must contain at least "url" and - "commit" and then do a ``clone(url, target=".")``, ``fetch ``, followed by a ``checkout(commit)``. - """ + """Reads "scm" from conandata.yml and clones/checks out the sources.""" sources = self._conanfile.conan_data["scm"] self.clone(url=sources["url"], target=".", args=["--origin=origin"]) - self.run(f"fetch origin {sources['commit']}") - self.checkout(commit=sources["commit"]) + if "tree" in sources: + # Tree objects are in the local store after clone; apply directly without commit scan. + self.run(f"read-tree --reset -u {sources['tree']}") + else: + self.run(f"fetch origin {sources['commit']}") + self.checkout(commit=sources["commit"])