From 1299a9d671826104a620e4b2fd8ce278dd306987 Mon Sep 17 00:00:00 2001 From: Dmitry Petrov <dmitry.petrov@rtsoft.de> Date: Wed, 3 Apr 2024 13:18:06 +0200 Subject: [PATCH] CI: add "report-image-diff" job Add a python script and a GitLab CI job to run it. The job is triggered when the build is complete. The script retrieves build artifacts for the default/main branch (currently "kirkstone") in the manifest repo and build artifacts for the integration branch of merge request. After that, it compares those builds and creates a summary of the overall changes. In particular: - size of compressed artifacts (artifacts.zip) - size of built image - difference between manifest files The summary is reported to stdout and as a comment to the merge request. Each run of the pipeline means removing the old summary comment and creating a new one when the build is finished. --- manifest-integration-jobs.yml | 21 ++ scripts/report_image_diff.py | 426 ++++++++++++++++++++++++++++++++++ 2 files changed, 447 insertions(+) create mode 100755 scripts/report_image_diff.py diff --git a/manifest-integration-jobs.yml b/manifest-integration-jobs.yml index a317acd0..2c622897 100644 --- a/manifest-integration-jobs.yml +++ b/manifest-integration-jobs.yml @@ -61,6 +61,27 @@ build: branch: "integrate/${CI_PROJECT_NAME}/${CI_COMMIT_REF_NAME}/into/${TARGET_BRANCH}" strategy: depend +report-image-diff: + extends: .infrastructure + stage: manifest-integration-jobs + timeout: 1h + needs: ["build"] + rules: + # Do not run build if the "skip build" label is set on the merge request + - if: $CI_MERGE_REQUEST_LABELS =~ /skip build/ + when: never + - if: $CI_MERGE_REQUEST_IID + script: + - cd ${CI_PROJECT_DIR} + - .gitlab-ci/scripts/report_image_diff.py + --gitlab-url="${CI_SERVER_URL}" + --token="${GITBOT_TOKEN}" + --manifest-project="${TARGET_PROJECT}" + --project="${CI_PROJECT_ID}" + --mr-iid="${CI_MERGE_REQUEST_IID}" + --target-branch="${TARGET_BRANCH}" + --source-branch="integrate/${CI_PROJECT_NAME}/${CI_COMMIT_REF_NAME}/into/${TARGET_BRANCH}" + # -------------------------------------------------------------------------------------- # Master pipeline # -------------------------------------------------------------------------------------- diff --git a/scripts/report_image_diff.py b/scripts/report_image_diff.py new file mode 100755 index 00000000..e6623efc --- /dev/null +++ b/scripts/report_image_diff.py @@ -0,0 +1,426 @@ +#!/usr/bin/env python3 +import argparse +import fnmatch +import logging +import sys +import time +from difflib import unified_diff +from typing import Optional + +import lxml.html +import requests +from gitlab import Gitlab +from gitlab.v4.objects import Project +from gitlab.v4.objects.pipelines import ProjectPipeline, ProjectPipelineJob + +import common + + +class FullBuildPipeline: + def __init__(self, project: Project, commit_sha: str): + self.project = project + self.commit_sha = commit_sha + self.upstream_pipeline = self.__get_upstream_pipeline() + self.build_pipelines = self.__get_build_pipelines() + + def __get_upstream_pipeline(self) -> ProjectPipeline: + """ + Get upstream (main) pipeline for the specified commit in the repository. + + Returns: + A ProjectPipeline object if succeed, None otherwise. + """ + + pipelines_for_commit = self.project.pipelines.list( + all=False, sha=self.commit_sha, order_by="id", sort="desc" + ) + + if not pipelines_for_commit: + return {} + + # For the main branch we have two types of pipelines: short and full. + # The short one just retriggers the full pipeline and does not contain any artifacts. + # The source of the short pipeline is "push". So skip it here. + # This can be done earlier when calling project.pipelines.list(). + # However, the currently installed version of python-gitlab does not support the "source" filter parameter. + # TODO: use self.project.pipelines.list(…, source="push") insted + build_pipeline = None + for p in pipelines_for_commit: + if p.source != "push": + build_pipeline = p + + if not build_pipeline: + return None + + return build_pipeline + + def __get_build_pipelines(self) -> dict[str, tuple[ProjectPipelineJob]]: + """ + Get the latest pipeline for the specified commit in the repository. + Then extract the downstream build pipelines with their jobs and return + them as a dictionary. + + Returns: + A dictionary where the key is the build pipeline name and + the value is a tuple of downstream jobs. + """ + + timeout = 3000 # 50 min + check_interval = 30 + + not_rdy_status = ["created", "pending", "running"] + if self.upstream_pipeline.status in not_rdy_status: + print( + f"The build pipeline ({self.upstream_pipeline.web_url}) is not ready." + ) + print("Wait for it to complete", end="", flush=True) + + while self.upstream_pipeline.status in not_rdy_status: + print(".", end="", flush=True) + time.sleep(check_interval) + timeout -= check_interval + if timeout < 0: + sys.exit("timeout") + + ret = {} + for bridge in self.upstream_pipeline.bridges.list(): + if not bridge.downstream_pipeline: + continue + downstream_pipeline = self.project.pipelines.get( + bridge.downstream_pipeline["id"] + ) + ret[bridge.name] = tuple(downstream_pipeline.jobs.list(all=True)) + return ret + + def get_jobs( + self, pipeline_name: str = "*", job_filter: str = "*" + ) -> tuple[ProjectPipelineJob]: + """ + Get build jobs for the specified pipeline. + The result can also be filtered by name. + + Args: + pipeline_name: str — name of build pipeline (e.g. "fngsystem-pipeline", "sdk-pipeline"). + job_filter: str — fnmatch pattern to select jobs by name. + + Returns: + A tuple of pipeline jobs. + """ + + ret = [] + + if pipeline_name == "*": + jobs = [] + for v in self.build_pipelines.values(): + jobs.extend(list(v)) + else: + try: + jobs = self.build_pipelines[pipeline_name] + except KeyError: + return None + + for job in jobs: + if fnmatch.fnmatch(job.name, job_filter): + ret.append(job) + return tuple(ret) + + +class BuildArtifacts: + def __init__(self, project: Project, pipelinejob: ProjectPipelineJob): + self.project = project + self.pipelinejob = pipelinejob + + def list_dir(self, path: str) -> dict[str, str]: + """ + Get a list of the files and directories for the specified path. + + Args: + path: str — relative path in the job artifacts. + + Returns: + A dictionary containing name of files as a key and file size in human-readable form as value. + """ + + url = f"{self.project.web_url}/-/jobs/{self.pipelinejob.id}/artifacts/browse/{path}" + r = requests.get(url) + if r.status_code != 200: + return {} + + ret = {} + + root = lxml.html.fromstring(r.text) + for tree_item in root.find_class("tree-item"): + name = tree_item.find_class("tree-item-file-name")[0].text_content().strip() + size = tree_item.xpath(".//td/text()")[-1].strip() + + if not size: + size = "dir" + + ret[name] = size + + return ret + + def get_artifact(self, path: str, outfile: Optional[str] = None) -> Optional[bytes]: + """ + Get a single artifact file from GitLab. + Save it to the file if "outfile" arg is specified. + + Args: + path: str — relative path to artifact file. + outfile: str — path to save the output file. + + Returns: + None if "outfile" arg is given. Otherwise, returns the artifact file as bytes. + """ + + job = self.project.jobs.get(self.pipelinejob.id, lazy=True) + + if not outfile: + return job.artifact(path) + + with open(outfile, "wb") as f: + job.artifact(path, streamed=True, action=f.write) + return None + + def get_archive_size(self) -> int: + """ + Get the size of compressed artifacts (artifacts.zip). + + Returns: + An integer containing the size of archive in bytes. + """ + + return self.pipelinejob.artifacts_file["size"] + + +def sizeof_fmt(num: int, p: int = 2) -> str: + """ + Get human-readable file sizes. + https://stackoverflow.com/a/1094933 + + Args: + num: int — size in bytes. + p — precision, number of digits after the decimal point. + + Returns: + A string that contains a human-readable size. + """ + + for unit in ("B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB"): + if abs(num) < 1024.0: + if unit == "B": + return f"{num} {unit}" + else: + return f"{num:3.{p}f} {unit}" + num /= 1024.0 + return f"{num:.{p}f} YiB" + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--gitlab-url", + help="""URL to the GitLab instance""", + dest="gitlab_url", + default=common.GITLAB_URL, + ) + parser.add_argument( + "--token", + help="""GitLab REST API private access token""", + dest="token", + required=True, + ) + parser.add_argument( + "--manifest-project", + help="""ID or name of the manifest project""", + dest="manifest_project", + required=True, + ) + parser.add_argument( + "--project", + help="""ID or name of the project containing the merge request for analysis of changes""", + dest="project", + required=True, + ) + parser.add_argument( + "--mr-iid", + help="""The project-level IID (internal ID) of the merge request""", + dest="mr_iid", + required=True, + ) + parser.add_argument( + "--source-branch", + help="""manifest branch for comparison""", + dest="source_branch", + required=True, + ) + parser.add_argument( + "--target-branch", + help="""manifest integration branch""", + dest="target_branch", + required=True, + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + help="""Increase verbosity.""", + ) + + args, _ = parser.parse_known_args() + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + logging.debug(args) + gitlab = Gitlab(args.gitlab_url, private_token=args.token) + + manifest_project = common.get_project(gitlab, args.manifest_project) + project = common.get_project(gitlab, args.project) + + mr = project.mergerequests.get(args.mr_iid, lazy=True) + + # Delete previous summary comments + comments = mr.notes.list(all=True) + for comment in comments: + if comment.body[0:4] == "**`⮘": + comment.delete() + + manifest_commit__main = manifest_project.commits.list( + all=False, ref_name=args.target_branch, order_by="id", sort="desc" + )[0] + build__main = FullBuildPipeline(manifest_project, manifest_commit__main.id) + + manifest_commit__mr = manifest_project.commits.list( + all=False, ref_name=args.source_branch, order_by="id", sort="desc" + )[0] + build__mr = FullBuildPipeline(manifest_project, manifest_commit__mr.id) + + machines = ( + "seco-genio510", + "seco-genio700", + "seco-mx6", + "seco-mx6ull", + "seco-mx8mm", + "seco-mx8mp", + ) + pipelines_and_distros = { + "fngsystem-pipeline": "seconorth-fngsystem", + "yocto-pipeline": "seconorth-wayland", + } + + summary = "" + + for pipeline, distro in pipelines_and_distros.items(): + for machine in machines: + summary += f"**`⮘ {distro} | {machine} ⮚`**\\\n" + + deploy_img_dir = f"build-{distro}-{machine}/tmp/deploy/images/{machine}/" + + artifacts__main = BuildArtifacts( + manifest_project, build__main.get_jobs(pipeline, f"build-{machine}")[0] + ) + artifacts__mr = BuildArtifacts( + manifest_project, build__mr.get_jobs(pipeline, f"build-{machine}")[0] + ) + + deploy_files__main = artifacts__main.list_dir(deploy_img_dir) + deploy_files__mr = artifacts__mr.list_dir(deploy_img_dir) + + # The difference in size of artifacts.zip for main and MR builds + zip_size__main = artifacts__main.get_archive_size() + zip_size__mr = artifacts__mr.get_archive_size() + + zip_size_diff = sizeof_fmt(abs(zip_size__main - zip_size__mr)) + sign = "+" if zip_size__main < zip_size__mr else "-" + + zip_size__main = sizeof_fmt(zip_size__main) + zip_size__mr = sizeof_fmt(zip_size__mr) + + summary += f" ├── artifacts.zip size: [ {zip_size__main} → {zip_size__mr} ] | {sign}{zip_size_diff}\\\n" + + # The difference in size of image for main and MR build + image_size__main = 0 + image_size__mr = 0 + + # The image file format may vary depending on machine and distribution. + if distro == "seconorth-fngsystem": + img_pattern = "*.rootfs.cpio.gz" + elif distro == "seconorth-wayland": + img_pattern = "*.rootfs.tar.gz" + if "genio" in machine: + img_pattern = "*.rootfs.wic.img" + else: + img_pattern = "" + + for name, size in deploy_files__main.items(): + if fnmatch.fnmatch(name, img_pattern): + image_size__main = size + for name, size in deploy_files__mr.items(): + if fnmatch.fnmatch(name, img_pattern): + image_size__mr = size + summary += ( + f" ├── image size: [ {image_size__main} → {image_size__mr} ]\\\n" + ) + + # Comparison of manifest files for main in MR builds + manifestfile_lines__main = [] + manifestfile_lines__mr = [] + + for file in deploy_files__main.keys(): + if fnmatch.fnmatch(file, "*.rootfs.manifest"): + manifestfile_lines__main = ( + artifacts__main.get_artifact(deploy_img_dir + file) + .decode("utf-8") + .splitlines() + ) + diff_fromfile = file + + for file in deploy_files__mr.keys(): + if fnmatch.fnmatch(file, "*.rootfs.manifest"): + manifestfile_lines__mr = ( + artifacts__mr.get_artifact(deploy_img_dir + file) + .decode("utf-8") + .splitlines() + ) + diff_tofile = file + + if not manifestfile_lines__main or not manifestfile_lines__mr: + continue + + summary += " └── manifest diff:\n" + summary += "```diff\n" + kernel_diff_lines = 0 + kernel_rev_old = "" + kernel_rev_new = "" + for l in unified_diff( + manifestfile_lines__main, + manifestfile_lines__mr, + fromfile=diff_fromfile, + tofile=diff_tofile, + n=0, + lineterm="", + ): + if fnmatch.fnmatch(l, "@@ * @@"): + continue + + if fnmatch.fnmatch(l, "?kernel-*"): + kernel_diff_lines += 1 + if l[0] == "-": + kernel_rev_old = l[-10:] + if l[0] == "+": + kernel_rev_new = l[-10:] + else: + summary += l + "\n" + summary += "```\n" + if kernel_diff_lines: + summary += f"⚠The kernel was updated from `{kernel_rev_old}` to `{kernel_rev_new}` commit.\n" + summary += f"{kernel_diff_lines} lines were removed from diff.\n" + + summary += "\n" + + print(summary) + mr.notes.create({"body": f"{summary}"}) + + +if __name__ == "__main__": + main() -- GitLab