Skip to content
Snippets Groups Projects
Commit 775c09c1 authored by Jonas Höppner's avatar Jonas Höppner
Browse files

Add gitlab_backup script to download all project exports of a gitlab group

parent 7f68f307
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
"""
Backup using gitlab project exports for all projects in a given group
"""
import argparse
import datetime
import logging
import os
import sys
import time
import gitlab as gl
__author__ = "Jonas Höppner"
__email__ = "jonas.hoeppner@garz-fricke.com"
GITLAB_SERVER = "https://git.seco.com"
# ID of the Seco Northern Europe
GITLAB_GROUP_ID = "556"
GITLAB_TIMEFORMAT = "%Y-%m-%dT%H:%M:%S.%f%z"
TIMEFORMAT = "%Y-%m-%d %H:%M"
verbose = 0
def decode_timestamp(t):
timestamp = datetime.datetime.strptime(t, GITLAB_TIMEFORMAT)
return timestamp
class Project:
def __init__(self, project):
self.project = project
def __str__(self):
return "## Project " + self.project.name + "\n"
def withlink(self):
return (
"\n\n## Project [" + self.project.name + "](" + self.project.web_url + ")\n"
)
def __eq__(self, p):
if not p:
return False
return self.project.id == p.project.id
class Tag:
def __init__(self, tag):
self.name = tag.name
self.message = tag.message
self.commit = tag.commit
"""
The tags timestamp is a little more complicated it normally points
to the tagged commit's timestamps. But the merge happens later.
To handle this, the relelated mergerequest is found by comparing the
sha's and also take the merged_at timestamp.
"""
self.timestamp = decode_timestamp(tag.commit["created_at"])
"""
The mr which introduced the taged commit
as gitlab-python does not support the V5 API yet
this is added later when traversing the mrs anyway
with V5 Api: https://docs.gitlab.com/ee/api/commits.html#list-merge-requests-associated-with-a-commit
"""
self.mergerequest = None
logging.debug(self.name + " -- " + self.commit["id"])
def __str__(self):
return self.name + " " + self.timestamp.strftime(TIMEFORMAT)
def add_mergerequest(self, m):
if self.mergerequest:
return
if m.mr.sha == self.commit["id"]:
self.mergerequest = m
# Update timestamp
# The tag points to the commit, but the merge of the merge request may has happend later
# as the commit, so the merged_at date is relevant. Otherwise the tagged commit and may be
# more end up in the wrong release
new_timestamp = decode_timestamp(self.mergerequest.mr.merged_at)
logging.debug("Found matching merge request for %s", self)
logging.debug(" - %s", self.timestamp.strftime(TIMEFORMAT))
logging.debug(" - %s", new_timestamp.strftime(TIMEFORMAT))
self.timestamp = new_timestamp
def header(self):
return (
"\n\n\n# Release "
+ self.name
+ "\n\nreleased at "
+ self.timestamp.strftime(TIMEFORMAT)
+ "\n\n"
)
class DummyTag:
def __init__(
self, name, message, date=datetime.datetime.now(tz=datetime.timezone.utc)
):
self.name = name
self.message = message
self.timestamp = date
def header(self):
return "\n\n\n# " + self.name + "\n\n"
def add_mergerequest(self, m):
# Needed as interface but does nothing
pass
class Release:
"""Store some release data"""
def __init__(self, tag):
self.tag = tag
self.mergerequests = []
def add_mergerequest(self, m):
# Check if this merge_request is related to the tag
self.tag.add_mergerequest(m)
# Adds a mergerequest to the project, but uses some filtering
# Ignore automated merge requests
if m.mr.author["username"] == "guf-gitbot":
return False
if m.mr.author["username"] == "gitbot":
return False
# With the movement to git.seco.com the MRs owned by
# the guf-gitbot have been transfered to tobias
# As it is not possible to change the owner back
# to gitbot we need an extra filter here on the
# branch name
if m.mr.source_branch.startswith("integrate/"):
return False
# Timestamp is not in this release
if self.tag.timestamp < m.timestamp:
return False
# Remove duplicates, don't print the same title
# twice in the same project and release
if any(
a.mr.title == m.mr.title and a.project == m.project
for a in self.mergerequests
):
return True
self.mergerequests.append(m)
return True
def header(self):
return self.tag.header()
def description(self):
m = self.tag.message
if not m:
return ""
return m
def __str__(self):
return self.tag.name
class MergeRequest:
def __init__(self, mr, p):
self.mr = mr
self.project = p
self.timestamp = decode_timestamp(self.mr.merged_at)
logging.debug("\nMergeRequest:")
logging.debug(mr)
def __str__(self):
return self.mr.title
def withlink(self):
out = self.mr.title + " [" + self.mr.reference + "](" + self.mr.web_url + ")"
return out
def get_projects_from_group(gitlab, base_group):
"""Recurse through all subgroups and create a flat list of all projects"""
p_list = []
for group_project in base_group.projects.list(retry_transient_errors=True):
p_list.append(gitlab.projects.get(group_project.id))
for subgroup in base_group.subgroups.list(retry_transient_errors=True):
group = gitlab.groups.get(subgroup.id)
p_list += get_projects_from_group(gitlab, group)
return p_list
def main(args):
parser = argparse.ArgumentParser(description=__doc__, usage="%(prog)s [OPTIONS]")
parser.add_argument(
"--gitlab-url",
help="""URL to the GitLab instance""",
dest="gitlab_url",
action="store",
default=GITLAB_SERVER,
)
parser.add_argument(
"--token",
help="""GitLab REST API private access token""",
dest="token",
required=True,
)
parser.add_argument(
"-g",
"--group-id",
action="store",
dest="groupid",
default=GITLAB_GROUP_ID,
help=("Specify the group by id to query projects in."),
)
parser.add_argument(
"-e",
"--export-path",
action="store",
dest="exportpath",
default="./export",
help=("Specify the path where the exported projects are stored."),
)
parser.add_argument(
"-v",
"--verbose",
action="count",
dest="verbose",
default=0,
help=("Increase verbosity."),
)
options = parser.parse_args(args)
if options.verbose:
logging.basicConfig(level=logging.DEBUG)
logging.debug(options)
gitlab = gl.Gitlab(options.gitlab_url, private_token=options.token)
group = gitlab.groups.get(options.groupid)
print("Getting projects in group {}".format(group.name))
projects = get_projects_from_group(gitlab, group)
p = {}
for project in projects:
exportpath = (
os.path.join(options.exportpath, project.path_with_namespace) + ".tar.gz"
)
os.makedirs(os.path.dirname(exportpath), exist_ok=True)
exportobject = project.exports.create(data=None)
p[project.path_with_namespace] = {
"project": project,
"exportpath": exportpath,
"export": exportobject,
"downloaded": False,
}
print("Triggered creation of export for {}".format(project.name))
while True:
in_started_state = 0
for project in p.keys():
if p[project]["downloaded"]:
continue
export = p[project]["export"]
export.refresh()
status = export.export_status
p[project]["laststatus"] = status
if status != "finished":
logging.debug("Project export status for %s: %s", project, status)
if (
status == "started"
or status == "queued"
or status == "regeneration_in_progress"
):
in_started_state += 1
continue
if status == "finished":
print("Downloading export for {}".format(project))
exportpath = p[project]["exportpath"]
with open(exportpath, "w+b") as f:
f.write(export.download())
p[project]["downloaded"] = True
if in_started_state == 0:
break
print("Waiting for exports to be finished ({})".format(in_started_state))
time.sleep(5)
for project in p.keys():
if p[project]["downloaded"]:
print(
"Project {} was downloaded to {}".format(
project, p[project]["exportpath"]
)
)
for project in p.keys():
if not p[project]["downloaded"]:
print(
"Project {}: export failed with status {}".format(
project, p[project]["laststatus"]
)
)
if __name__ == "__main__":
main(sys.argv[1:])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment