From 1299a9d671826104a620e4b2fd8ce278dd306987 Mon Sep 17 00:00:00 2001
From: Dmitry Petrov <dmitry.petrov@rtsoft.de>
Date: Wed, 3 Apr 2024 13:18:06 +0200
Subject: [PATCH] CI: add "report-image-diff" job

Add a python script and a GitLab CI job to run it. The job is triggered
when the build is complete.

The script retrieves build artifacts for the default/main branch
(currently "kirkstone") in the manifest repo and build artifacts
for the integration branch of merge request.

After that, it compares those builds and creates a summary of the overall
changes. In particular:
- size of compressed artifacts (artifacts.zip)
- size of built image
- difference between manifest files

The summary is reported to stdout and as a comment to the merge request.
Each run of the pipeline means removing the old summary comment and
creating a new one when the build is finished.
---
 manifest-integration-jobs.yml |  21 ++
 scripts/report_image_diff.py  | 426 ++++++++++++++++++++++++++++++++++
 2 files changed, 447 insertions(+)
 create mode 100755 scripts/report_image_diff.py

diff --git a/manifest-integration-jobs.yml b/manifest-integration-jobs.yml
index a317acd0..2c622897 100644
--- a/manifest-integration-jobs.yml
+++ b/manifest-integration-jobs.yml
@@ -61,6 +61,27 @@ build:
     branch: "integrate/${CI_PROJECT_NAME}/${CI_COMMIT_REF_NAME}/into/${TARGET_BRANCH}"
     strategy: depend
 
+report-image-diff:
+  extends: .infrastructure
+  stage: manifest-integration-jobs
+  timeout: 1h
+  needs: ["build"]
+  rules:
+    # Do not run build if the "skip build" label is set on the merge request
+    - if: $CI_MERGE_REQUEST_LABELS =~ /skip build/
+      when: never
+    - if: $CI_MERGE_REQUEST_IID
+  script:
+    - cd ${CI_PROJECT_DIR}
+    - .gitlab-ci/scripts/report_image_diff.py
+        --gitlab-url="${CI_SERVER_URL}"
+        --token="${GITBOT_TOKEN}"
+        --manifest-project="${TARGET_PROJECT}"
+        --project="${CI_PROJECT_ID}"
+        --mr-iid="${CI_MERGE_REQUEST_IID}"
+        --target-branch="${TARGET_BRANCH}"
+        --source-branch="integrate/${CI_PROJECT_NAME}/${CI_COMMIT_REF_NAME}/into/${TARGET_BRANCH}"
+
 # --------------------------------------------------------------------------------------
 # Master pipeline
 # --------------------------------------------------------------------------------------
diff --git a/scripts/report_image_diff.py b/scripts/report_image_diff.py
new file mode 100755
index 00000000..e6623efc
--- /dev/null
+++ b/scripts/report_image_diff.py
@@ -0,0 +1,426 @@
+#!/usr/bin/env python3
+import argparse
+import fnmatch
+import logging
+import sys
+import time
+from difflib import unified_diff
+from typing import Optional
+
+import lxml.html
+import requests
+from gitlab import Gitlab
+from gitlab.v4.objects import Project
+from gitlab.v4.objects.pipelines import ProjectPipeline, ProjectPipelineJob
+
+import common
+
+
+class FullBuildPipeline:
+    def __init__(self, project: Project, commit_sha: str):
+        self.project = project
+        self.commit_sha = commit_sha
+        self.upstream_pipeline = self.__get_upstream_pipeline()
+        self.build_pipelines = self.__get_build_pipelines()
+
+    def __get_upstream_pipeline(self) -> ProjectPipeline:
+        """
+        Get upstream (main) pipeline for the specified commit in the repository.
+
+        Returns:
+            A ProjectPipeline object if succeed, None otherwise.
+        """
+
+        pipelines_for_commit = self.project.pipelines.list(
+            all=False, sha=self.commit_sha, order_by="id", sort="desc"
+        )
+
+        if not pipelines_for_commit:
+            return {}
+
+        # For the main branch we have two types of pipelines: short and full.
+        # The short one just retriggers the full pipeline and does not contain any artifacts.
+        # The source of the short pipeline is "push". So skip it here.
+        # This can be done earlier when calling project.pipelines.list().
+        # However, the currently installed version of python-gitlab does not support the "source" filter parameter.
+        # TODO: use self.project.pipelines.list(…, source="push") insted
+        build_pipeline = None
+        for p in pipelines_for_commit:
+            if p.source != "push":
+                build_pipeline = p
+
+        if not build_pipeline:
+            return None
+
+        return build_pipeline
+
+    def __get_build_pipelines(self) -> dict[str, tuple[ProjectPipelineJob]]:
+        """
+        Get the latest pipeline for the specified commit in the repository.
+        Then extract the downstream build pipelines with their jobs and return
+        them as a dictionary.
+
+        Returns:
+            A dictionary where the key is the build pipeline name and
+            the value is a tuple of downstream jobs.
+        """
+
+        timeout = 3000  # 50 min
+        check_interval = 30
+
+        not_rdy_status = ["created", "pending", "running"]
+        if self.upstream_pipeline.status in not_rdy_status:
+            print(
+                f"The build pipeline ({self.upstream_pipeline.web_url}) is not ready."
+            )
+            print("Wait for it to complete", end="", flush=True)
+
+        while self.upstream_pipeline.status in not_rdy_status:
+            print(".", end="", flush=True)
+            time.sleep(check_interval)
+            timeout -= check_interval
+            if timeout < 0:
+                sys.exit("timeout")
+
+        ret = {}
+        for bridge in self.upstream_pipeline.bridges.list():
+            if not bridge.downstream_pipeline:
+                continue
+            downstream_pipeline = self.project.pipelines.get(
+                bridge.downstream_pipeline["id"]
+            )
+            ret[bridge.name] = tuple(downstream_pipeline.jobs.list(all=True))
+        return ret
+
+    def get_jobs(
+        self, pipeline_name: str = "*", job_filter: str = "*"
+    ) -> tuple[ProjectPipelineJob]:
+        """
+        Get build jobs for the specified pipeline.
+        The result can also be filtered by name.
+
+        Args:
+            pipeline_name: str — name of build pipeline (e.g. "fngsystem-pipeline", "sdk-pipeline").
+            job_filter: str — fnmatch pattern to select jobs by name.
+
+        Returns:
+            A tuple of pipeline jobs.
+        """
+
+        ret = []
+
+        if pipeline_name == "*":
+            jobs = []
+            for v in self.build_pipelines.values():
+                jobs.extend(list(v))
+        else:
+            try:
+                jobs = self.build_pipelines[pipeline_name]
+            except KeyError:
+                return None
+
+        for job in jobs:
+            if fnmatch.fnmatch(job.name, job_filter):
+                ret.append(job)
+        return tuple(ret)
+
+
+class BuildArtifacts:
+    def __init__(self, project: Project, pipelinejob: ProjectPipelineJob):
+        self.project = project
+        self.pipelinejob = pipelinejob
+
+    def list_dir(self, path: str) -> dict[str, str]:
+        """
+        Get a list of the files and directories for the specified path.
+
+        Args:
+            path: str — relative path in the job artifacts.
+
+        Returns:
+            A dictionary containing name of files as a key and file size in human-readable form as value.
+        """
+
+        url = f"{self.project.web_url}/-/jobs/{self.pipelinejob.id}/artifacts/browse/{path}"
+        r = requests.get(url)
+        if r.status_code != 200:
+            return {}
+
+        ret = {}
+
+        root = lxml.html.fromstring(r.text)
+        for tree_item in root.find_class("tree-item"):
+            name = tree_item.find_class("tree-item-file-name")[0].text_content().strip()
+            size = tree_item.xpath(".//td/text()")[-1].strip()
+
+            if not size:
+                size = "dir"
+
+            ret[name] = size
+
+        return ret
+
+    def get_artifact(self, path: str, outfile: Optional[str] = None) -> Optional[bytes]:
+        """
+        Get a single artifact file from GitLab.
+        Save it to the file if "outfile" arg is specified.
+
+        Args:
+            path: str — relative path to artifact file.
+            outfile: str — path to save the output file.
+
+        Returns:
+            None if "outfile" arg is given. Otherwise, returns the artifact file as bytes.
+        """
+
+        job = self.project.jobs.get(self.pipelinejob.id, lazy=True)
+
+        if not outfile:
+            return job.artifact(path)
+
+        with open(outfile, "wb") as f:
+            job.artifact(path, streamed=True, action=f.write)
+        return None
+
+    def get_archive_size(self) -> int:
+        """
+        Get the size of compressed artifacts (artifacts.zip).
+
+        Returns:
+            An integer containing the size of archive in bytes.
+        """
+
+        return self.pipelinejob.artifacts_file["size"]
+
+
+def sizeof_fmt(num: int, p: int = 2) -> str:
+    """
+    Get human-readable file sizes.
+    https://stackoverflow.com/a/1094933
+
+    Args:
+        num: int — size in bytes.
+        p — precision, number of digits after the decimal point.
+
+    Returns:
+        A string that contains a human-readable size.
+    """
+
+    for unit in ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB"):
+        if abs(num) < 1024.0:
+            if unit == "B":
+                return f"{num} {unit}"
+            else:
+                return f"{num:3.{p}f} {unit}"
+        num /= 1024.0
+    return f"{num:.{p}f} YiB"
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--gitlab-url",
+        help="""URL to the GitLab instance""",
+        dest="gitlab_url",
+        default=common.GITLAB_URL,
+    )
+    parser.add_argument(
+        "--token",
+        help="""GitLab REST API private access token""",
+        dest="token",
+        required=True,
+    )
+    parser.add_argument(
+        "--manifest-project",
+        help="""ID or name of the manifest project""",
+        dest="manifest_project",
+        required=True,
+    )
+    parser.add_argument(
+        "--project",
+        help="""ID or name of the project containing the merge request for analysis of changes""",
+        dest="project",
+        required=True,
+    )
+    parser.add_argument(
+        "--mr-iid",
+        help="""The project-level IID (internal ID) of the merge request""",
+        dest="mr_iid",
+        required=True,
+    )
+    parser.add_argument(
+        "--source-branch",
+        help="""manifest branch for comparison""",
+        dest="source_branch",
+        required=True,
+    )
+    parser.add_argument(
+        "--target-branch",
+        help="""manifest integration branch""",
+        dest="target_branch",
+        required=True,
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        help="""Increase verbosity.""",
+    )
+
+    args, _ = parser.parse_known_args()
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+
+    logging.debug(args)
+    gitlab = Gitlab(args.gitlab_url, private_token=args.token)
+
+    manifest_project = common.get_project(gitlab, args.manifest_project)
+    project = common.get_project(gitlab, args.project)
+
+    mr = project.mergerequests.get(args.mr_iid, lazy=True)
+
+    # Delete previous summary comments
+    comments = mr.notes.list(all=True)
+    for comment in comments:
+        if comment.body[0:4] == "**`⮘":
+            comment.delete()
+
+    manifest_commit__main = manifest_project.commits.list(
+        all=False, ref_name=args.target_branch, order_by="id", sort="desc"
+    )[0]
+    build__main = FullBuildPipeline(manifest_project, manifest_commit__main.id)
+
+    manifest_commit__mr = manifest_project.commits.list(
+        all=False, ref_name=args.source_branch, order_by="id", sort="desc"
+    )[0]
+    build__mr = FullBuildPipeline(manifest_project, manifest_commit__mr.id)
+
+    machines = (
+        "seco-genio510",
+        "seco-genio700",
+        "seco-mx6",
+        "seco-mx6ull",
+        "seco-mx8mm",
+        "seco-mx8mp",
+    )
+    pipelines_and_distros = {
+        "fngsystem-pipeline": "seconorth-fngsystem",
+        "yocto-pipeline": "seconorth-wayland",
+    }
+
+    summary = ""
+
+    for pipeline, distro in pipelines_and_distros.items():
+        for machine in machines:
+            summary += f"**`⮘ {distro} | {machine} ⮚`**\\\n"
+
+            deploy_img_dir = f"build-{distro}-{machine}/tmp/deploy/images/{machine}/"
+
+            artifacts__main = BuildArtifacts(
+                manifest_project, build__main.get_jobs(pipeline, f"build-{machine}")[0]
+            )
+            artifacts__mr = BuildArtifacts(
+                manifest_project, build__mr.get_jobs(pipeline, f"build-{machine}")[0]
+            )
+
+            deploy_files__main = artifacts__main.list_dir(deploy_img_dir)
+            deploy_files__mr = artifacts__mr.list_dir(deploy_img_dir)
+
+            # The difference in size of artifacts.zip for main and MR builds
+            zip_size__main = artifacts__main.get_archive_size()
+            zip_size__mr = artifacts__mr.get_archive_size()
+
+            zip_size_diff = sizeof_fmt(abs(zip_size__main - zip_size__mr))
+            sign = "+" if zip_size__main < zip_size__mr else "-"
+
+            zip_size__main = sizeof_fmt(zip_size__main)
+            zip_size__mr = sizeof_fmt(zip_size__mr)
+
+            summary += f"    ├── artifacts.zip size: [ {zip_size__main} → {zip_size__mr} ] | {sign}{zip_size_diff}\\\n"
+
+            # The difference in size of image for main and MR build
+            image_size__main = 0
+            image_size__mr = 0
+
+            # The image file format may vary depending on machine and distribution.
+            if distro == "seconorth-fngsystem":
+                img_pattern = "*.rootfs.cpio.gz"
+            elif distro == "seconorth-wayland":
+                img_pattern = "*.rootfs.tar.gz"
+                if "genio" in machine:
+                    img_pattern = "*.rootfs.wic.img"
+            else:
+                img_pattern = ""
+
+            for name, size in deploy_files__main.items():
+                if fnmatch.fnmatch(name, img_pattern):
+                    image_size__main = size
+            for name, size in deploy_files__mr.items():
+                if fnmatch.fnmatch(name, img_pattern):
+                    image_size__mr = size
+            summary += (
+                f"    ├── image size: [ {image_size__main} → {image_size__mr} ]\\\n"
+            )
+
+            # Comparison of manifest files for main in MR builds
+            manifestfile_lines__main = []
+            manifestfile_lines__mr = []
+
+            for file in deploy_files__main.keys():
+                if fnmatch.fnmatch(file, "*.rootfs.manifest"):
+                    manifestfile_lines__main = (
+                        artifacts__main.get_artifact(deploy_img_dir + file)
+                        .decode("utf-8")
+                        .splitlines()
+                    )
+                    diff_fromfile = file
+
+            for file in deploy_files__mr.keys():
+                if fnmatch.fnmatch(file, "*.rootfs.manifest"):
+                    manifestfile_lines__mr = (
+                        artifacts__mr.get_artifact(deploy_img_dir + file)
+                        .decode("utf-8")
+                        .splitlines()
+                    )
+                    diff_tofile = file
+
+            if not manifestfile_lines__main or not manifestfile_lines__mr:
+                continue
+
+            summary += "    └── manifest diff:\n"
+            summary += "```diff\n"
+            kernel_diff_lines = 0
+            kernel_rev_old = ""
+            kernel_rev_new = ""
+            for l in unified_diff(
+                manifestfile_lines__main,
+                manifestfile_lines__mr,
+                fromfile=diff_fromfile,
+                tofile=diff_tofile,
+                n=0,
+                lineterm="",
+            ):
+                if fnmatch.fnmatch(l, "@@ * @@"):
+                    continue
+
+                if fnmatch.fnmatch(l, "?kernel-*"):
+                    kernel_diff_lines += 1
+                    if l[0] == "-":
+                        kernel_rev_old = l[-10:]
+                    if l[0] == "+":
+                        kernel_rev_new = l[-10:]
+                else:
+                    summary += l + "\n"
+            summary += "```\n"
+            if kernel_diff_lines:
+                summary += f"âš  The kernel was updated from `{kernel_rev_old}` to `{kernel_rev_new}` commit.\n"
+                summary += f"{kernel_diff_lines} lines were removed from diff.\n"
+
+            summary += "\n"
+
+    print(summary)
+    mr.notes.create({"body": f"{summary}"})
+
+
+if __name__ == "__main__":
+    main()
-- 
GitLab