From de9851beff4cbb43172bc3e2ad4709f8d9e00790 Mon Sep 17 00:00:00 2001
From: Dmitry Petrov <dmitry.petrov@rtsoft.de>
Date: Mon, 15 Apr 2024 19:54:58 +0200
Subject: [PATCH] ci: add a job to handle artifacts after merge

In the latest commit in the manifest it often happens, that the pipeline
does not contain the artifacts itself, but just a link to the pipeline of the MR.

Added a python script with corresponding GitLab CI job to launch this script.
The script searches for the latest pipelines for default branch in manifest repo,
and then decides either to keep or delete artifacts for particular build.

The build artifacts are kept for:
- the latest commit
- the last successful build
- the tagged commits (e.g. "fngsystem/47.0", "kirkstone/20.0")
---
 manifest-pipeline.yml       |  20 +++
 scripts/handle_artifacts.py | 235 ++++++++++++++++++++++++++++++++++++
 2 files changed, 255 insertions(+)
 create mode 100755 scripts/handle_artifacts.py

diff --git a/manifest-pipeline.yml b/manifest-pipeline.yml
index befbf978..21481347 100644
--- a/manifest-pipeline.yml
+++ b/manifest-pipeline.yml
@@ -10,6 +10,7 @@ stages:
   - trigger
   - retrigger
   - build
+  - artifacts
 
 workflow:
   rules:
@@ -132,3 +133,22 @@ build:merge_request:
         --project=${CI_PROJECT_PATH}
         --commit=${CI_COMMIT_SHA}
         --ref=${MASTER_BRANCH}
+
+# --------------------------------------------------------------------------------------
+# Keep latest build artifacts (runs on master after merging a merge request)
+# --------------------------------------------------------------------------------------
+
+handle_artifacts:
+  extends:
+    - .infrastructure
+    - .short_master_pipeline
+  stage: artifacts
+  needs: ["build:merge_request"]
+  timeout: 1h
+  script:
+    - cd ${CI_PROJECT_DIR}
+    - .gitlab-ci/scripts/handle_artifacts.py
+        --gitlab-url="${CI_SERVER_URL}"
+        --token="${GITBOT_TOKEN}"
+        --manifest-project="${CI_PROJECT_PATH}"
+        --manifest-branch="${MASTER_BRANCH}"
diff --git a/scripts/handle_artifacts.py b/scripts/handle_artifacts.py
new file mode 100755
index 00000000..29cf1fc6
--- /dev/null
+++ b/scripts/handle_artifacts.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+import argparse
+import fnmatch
+import logging
+import sys
+import time
+
+from gitlab import Gitlab
+from gitlab.v4.objects import Project
+from gitlab.v4.objects.pipelines import ProjectPipeline, ProjectPipelineJob
+
+import common
+
+
+class FullBuildPipeline:
+    def __init__(self, project: Project, commit_sha: str):
+        self.project = project
+        self.commit_sha = commit_sha
+        self.upstream_pipeline = self.__get_upstream_pipeline()
+        self.build_pipelines = self.__get_build_pipelines()
+
+    def __get_upstream_pipeline(self) -> ProjectPipeline:
+        """
+        Get upstream (main) pipeline for the specified commit in the repository.
+
+        Returns:
+            A ProjectPipeline object if succeed, None otherwise.
+        """
+
+        pipelines_for_commit = self.project.pipelines.list(
+            all=False, sha=self.commit_sha, order_by="id", sort="desc"
+        )
+
+        if not pipelines_for_commit:
+            return {}
+
+        # For the main branch we have two types of pipelines: short and full.
+        # The short one just retriggers the full pipeline and does not contain any artifacts.
+        # The source of the short pipeline is "push". So skip it here.
+        # This can be done earlier when calling project.pipelines.list().
+        # However, the currently installed version of python-gitlab does not support the "source" filter parameter.
+        # TODO: use self.project.pipelines.list(…, source="push") insted
+        build_pipeline = None
+        for p in pipelines_for_commit:
+            if p.source != "push":
+                build_pipeline = p
+
+        if not build_pipeline:
+            return None
+
+        return build_pipeline
+
+    def __get_build_pipelines(self) -> dict[str, tuple[ProjectPipelineJob]]:
+        """
+        Get the latest pipeline for the specified commit in the repository.
+        Then extract the downstream build pipelines with their jobs and return
+        them as a dictionary.
+
+        Returns:
+            A dictionary where the key is the build pipeline name and
+            the value is a tuple of downstream jobs.
+        """
+
+        timeout = 3000  # 50 min
+        check_interval = 30
+
+        not_rdy_status = ["created", "pending", "running"]
+        if self.upstream_pipeline.status in not_rdy_status:
+            print(
+                f"The build pipeline ({self.upstream_pipeline.web_url}) is not ready."
+            )
+            print("Wait for it to complete", end="", flush=True)
+
+        while self.upstream_pipeline.status in not_rdy_status:
+            print(".", end="", flush=True)
+            time.sleep(check_interval)
+            timeout -= check_interval
+            if timeout < 0:
+                sys.exit("timeout")
+
+        ret = {}
+        for bridge in self.upstream_pipeline.bridges.list():
+            if not bridge.downstream_pipeline:
+                continue
+            downstream_pipeline = self.project.pipelines.get(
+                bridge.downstream_pipeline["id"]
+            )
+            ret[bridge.name] = tuple(downstream_pipeline.jobs.list(all=True))
+        return ret
+
+    def get_jobs(
+        self, pipeline_name: str = "*", job_filter: str = "*"
+    ) -> tuple[ProjectPipelineJob]:
+        """
+        Get build jobs for the specified pipeline.
+        The result can also be filtered by name.
+
+        Args:
+            pipeline_name: str — name of build pipeline (e.g. "fngsystem-pipeline", "sdk-pipeline").
+            job_filter: str — fnmatch pattern to select jobs by name.
+
+        Returns:
+            A tuple of pipeline jobs.
+        """
+
+        ret = []
+
+        if pipeline_name == "*":
+            jobs = []
+            for v in self.build_pipelines.values():
+                jobs.extend(list(v))
+        else:
+            try:
+                jobs = self.build_pipelines[pipeline_name]
+            except KeyError:
+                return None
+
+        for job in jobs:
+            if fnmatch.fnmatch(job.name, job_filter):
+                ret.append(job)
+        return tuple(ret)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--gitlab-url",
+        help="""URL to the GitLab instance""",
+        dest="gitlab_url",
+        default=common.GITLAB_URL,
+    )
+    parser.add_argument(
+        "--token",
+        help="""GitLab REST API private access token""",
+        dest="token",
+        required=True,
+    )
+    parser.add_argument(
+        "--manifest-project",
+        help="""ID or name of the manifest project""",
+        dest="manifest_project",
+        required=True,
+    )
+    parser.add_argument(
+        "--manifest-branch",
+        help="""manifest integration branch""",
+        dest="manifest_branch",
+        required=True,
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="""Increase verbosity.""",
+    )
+
+    args, _ = parser.parse_known_args()
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+
+    logging.debug(args)
+    gitlab = Gitlab(args.gitlab_url, private_token=args.token)
+
+    manifest_project = common.get_project(gitlab, args.manifest_project)
+
+    manifest_commits = manifest_project.commits.list(
+        all=False, ref_name=args.manifest_branch, order_by="id", sort="desc"
+    )
+    if not manifest_commits:
+        sys.exit("Failed to get the latest commit ID")
+    latest_build_sha = manifest_commits[0].id
+
+    latest_successful_build_sha = None
+    for commit in manifest_commits:
+        if (
+            FullBuildPipeline(manifest_project, commit.id).upstream_pipeline.status
+            != "success"
+        ):
+            continue
+        else:
+            latest_successful_build_sha = commit.id
+            break
+
+    if not latest_successful_build_sha:
+        sys.exit("Failed to find the latest successful pipeline.")
+
+    tags = manifest_project.tags.list()
+    tagged_commits_sha = [tag.commit["id"] for tag in tags]
+
+    # List of commits IDs for which keep build artifacts forever
+    keep_artifacts_sha = []
+    # Always keep artifacts for the latest build
+    keep_artifacts_sha.append(latest_build_sha)
+    # Just in case, keep artifacts for the latest successful build
+    keep_artifacts_sha.append(latest_successful_build_sha)
+    # Always keep artifacts for tagged commits (e.g. "fngsystem/47.0", "kirkstone/20.0")
+    keep_artifacts_sha.extend(tagged_commits_sha)
+    # Remove duplicates
+    keep_artifacts_sha = list(set(keep_artifacts_sha))
+
+    for commit in manifest_commits:
+        full_build_pipeline = FullBuildPipeline(manifest_project, commit.id)
+        print(f"Full pipeline: {full_build_pipeline.upstream_pipeline}")
+        for pipelinejob in full_build_pipeline.get_jobs():
+
+            if pipelinejob.status != "success":
+                continue
+
+            if not pipelinejob.artifacts:
+                continue
+
+            # There are no "real" artifacts in "build-version" job but only artifacts:reports:dotenv
+            # https://docs.gitlab.com/ee/ci/yaml/artifacts_reports.html#artifactsreportsdotenv
+            if pipelinejob.name == "build-version":
+                continue
+
+            # Job methods (keep_artifacts, delete_artifacts) are not available on ProjectPipelineJob objects.
+            # To use these methods create a ProjectJob object:
+            #     pipeline_job = pipeline.jobs.list()[0]
+            #     job = project.jobs.get(pipeline_job.id, lazy=True)
+            #     job.keep_artifacts()
+            job = manifest_project.jobs.get(pipelinejob.id, lazy=True)
+
+            if commit.id in keep_artifacts_sha:
+                print(f"keep_artifacts() for {pipelinejob.web_url}")
+                job.keep_artifacts()
+            else:
+                print(f"delete_artifacts() for {pipelinejob.web_url}")
+                job.delete_artifacts()
+
+        print("—" * 80)
+
+
+if __name__ == "__main__":
+    main()
-- 
GitLab