Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
gitlab-ci
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Deploy
Releases
Package Registry
Model registry
Operate
Terraform modules
Analyze
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Clea OS
infrastructure
gitlab-ci
Commits
775c09c1
Commit
775c09c1
authored
2 years ago
by
Jonas Höppner
Browse files
Options
Downloads
Patches
Plain Diff
Add gitlab_backup script to download all project exports of a gitlab group
parent
7f68f307
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
scripts/gitlab_backup.py
+322
-0
322 additions, 0 deletions
scripts/gitlab_backup.py
with
322 additions
and
0 deletions
scripts/gitlab_backup.py
0 → 100755
+
322
−
0
View file @
775c09c1
#!/usr/bin/env python3
"""
Backup using gitlab project exports for all projects in a given group
"""
import
argparse
import
datetime
import
logging
import
os
import
sys
import
time
import
gitlab
as
gl
__author__
=
"
Jonas Höppner
"
__email__
=
"
jonas.hoeppner@garz-fricke.com
"
GITLAB_SERVER
=
"
https://git.seco.com
"
# ID of the Seco Northern Europe
GITLAB_GROUP_ID
=
"
556
"
GITLAB_TIMEFORMAT
=
"
%Y-%m-%dT%H:%M:%S.%f%z
"
TIMEFORMAT
=
"
%Y-%m-%d %H:%M
"
verbose
=
0
def
decode_timestamp
(
t
):
timestamp
=
datetime
.
datetime
.
strptime
(
t
,
GITLAB_TIMEFORMAT
)
return
timestamp
class
Project
:
def
__init__
(
self
,
project
):
self
.
project
=
project
def
__str__
(
self
):
return
"
## Project
"
+
self
.
project
.
name
+
"
\n
"
def
withlink
(
self
):
return
(
"
\n\n
## Project [
"
+
self
.
project
.
name
+
"
](
"
+
self
.
project
.
web_url
+
"
)
\n
"
)
def
__eq__
(
self
,
p
):
if
not
p
:
return
False
return
self
.
project
.
id
==
p
.
project
.
id
class
Tag
:
def
__init__
(
self
,
tag
):
self
.
name
=
tag
.
name
self
.
message
=
tag
.
message
self
.
commit
=
tag
.
commit
"""
The tags timestamp is a little more complicated it normally points
to the tagged commit
'
s timestamps. But the merge happens later.
To handle this, the relelated mergerequest is found by comparing the
sha
'
s and also take the merged_at timestamp.
"""
self
.
timestamp
=
decode_timestamp
(
tag
.
commit
[
"
created_at
"
])
"""
The mr which introduced the taged commit
as gitlab-python does not support the V5 API yet
this is added later when traversing the mrs anyway
with V5 Api: https://docs.gitlab.com/ee/api/commits.html#list-merge-requests-associated-with-a-commit
"""
self
.
mergerequest
=
None
logging
.
debug
(
self
.
name
+
"
--
"
+
self
.
commit
[
"
id
"
])
def
__str__
(
self
):
return
self
.
name
+
"
"
+
self
.
timestamp
.
strftime
(
TIMEFORMAT
)
def
add_mergerequest
(
self
,
m
):
if
self
.
mergerequest
:
return
if
m
.
mr
.
sha
==
self
.
commit
[
"
id
"
]:
self
.
mergerequest
=
m
# Update timestamp
# The tag points to the commit, but the merge of the merge request may has happend later
# as the commit, so the merged_at date is relevant. Otherwise the tagged commit and may be
# more end up in the wrong release
new_timestamp
=
decode_timestamp
(
self
.
mergerequest
.
mr
.
merged_at
)
logging
.
debug
(
"
Found matching merge request for %s
"
,
self
)
logging
.
debug
(
"
- %s
"
,
self
.
timestamp
.
strftime
(
TIMEFORMAT
))
logging
.
debug
(
"
- %s
"
,
new_timestamp
.
strftime
(
TIMEFORMAT
))
self
.
timestamp
=
new_timestamp
def
header
(
self
):
return
(
"
\n\n\n
# Release
"
+
self
.
name
+
"
\n\n
released at
"
+
self
.
timestamp
.
strftime
(
TIMEFORMAT
)
+
"
\n\n
"
)
class
DummyTag
:
def
__init__
(
self
,
name
,
message
,
date
=
datetime
.
datetime
.
now
(
tz
=
datetime
.
timezone
.
utc
)
):
self
.
name
=
name
self
.
message
=
message
self
.
timestamp
=
date
def
header
(
self
):
return
"
\n\n\n
#
"
+
self
.
name
+
"
\n\n
"
def
add_mergerequest
(
self
,
m
):
# Needed as interface but does nothing
pass
class
Release
:
"""
Store some release data
"""
def
__init__
(
self
,
tag
):
self
.
tag
=
tag
self
.
mergerequests
=
[]
def
add_mergerequest
(
self
,
m
):
# Check if this merge_request is related to the tag
self
.
tag
.
add_mergerequest
(
m
)
# Adds a mergerequest to the project, but uses some filtering
# Ignore automated merge requests
if
m
.
mr
.
author
[
"
username
"
]
==
"
guf-gitbot
"
:
return
False
if
m
.
mr
.
author
[
"
username
"
]
==
"
gitbot
"
:
return
False
# With the movement to git.seco.com the MRs owned by
# the guf-gitbot have been transfered to tobias
# As it is not possible to change the owner back
# to gitbot we need an extra filter here on the
# branch name
if
m
.
mr
.
source_branch
.
startswith
(
"
integrate/
"
):
return
False
# Timestamp is not in this release
if
self
.
tag
.
timestamp
<
m
.
timestamp
:
return
False
# Remove duplicates, don't print the same title
# twice in the same project and release
if
any
(
a
.
mr
.
title
==
m
.
mr
.
title
and
a
.
project
==
m
.
project
for
a
in
self
.
mergerequests
):
return
True
self
.
mergerequests
.
append
(
m
)
return
True
def
header
(
self
):
return
self
.
tag
.
header
()
def
description
(
self
):
m
=
self
.
tag
.
message
if
not
m
:
return
""
return
m
def
__str__
(
self
):
return
self
.
tag
.
name
class
MergeRequest
:
def
__init__
(
self
,
mr
,
p
):
self
.
mr
=
mr
self
.
project
=
p
self
.
timestamp
=
decode_timestamp
(
self
.
mr
.
merged_at
)
logging
.
debug
(
"
\n
MergeRequest:
"
)
logging
.
debug
(
mr
)
def
__str__
(
self
):
return
self
.
mr
.
title
def
withlink
(
self
):
out
=
self
.
mr
.
title
+
"
[
"
+
self
.
mr
.
reference
+
"
](
"
+
self
.
mr
.
web_url
+
"
)
"
return
out
def
get_projects_from_group
(
gitlab
,
base_group
):
"""
Recurse through all subgroups and create a flat list of all projects
"""
p_list
=
[]
for
group_project
in
base_group
.
projects
.
list
(
retry_transient_errors
=
True
):
p_list
.
append
(
gitlab
.
projects
.
get
(
group_project
.
id
))
for
subgroup
in
base_group
.
subgroups
.
list
(
retry_transient_errors
=
True
):
group
=
gitlab
.
groups
.
get
(
subgroup
.
id
)
p_list
+=
get_projects_from_group
(
gitlab
,
group
)
return
p_list
def
main
(
args
):
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
,
usage
=
"
%(prog)s [OPTIONS]
"
)
parser
.
add_argument
(
"
--gitlab-url
"
,
help
=
"""
URL to the GitLab instance
"""
,
dest
=
"
gitlab_url
"
,
action
=
"
store
"
,
default
=
GITLAB_SERVER
,
)
parser
.
add_argument
(
"
--token
"
,
help
=
"""
GitLab REST API private access token
"""
,
dest
=
"
token
"
,
required
=
True
,
)
parser
.
add_argument
(
"
-g
"
,
"
--group-id
"
,
action
=
"
store
"
,
dest
=
"
groupid
"
,
default
=
GITLAB_GROUP_ID
,
help
=
(
"
Specify the group by id to query projects in.
"
),
)
parser
.
add_argument
(
"
-e
"
,
"
--export-path
"
,
action
=
"
store
"
,
dest
=
"
exportpath
"
,
default
=
"
./export
"
,
help
=
(
"
Specify the path where the exported projects are stored.
"
),
)
parser
.
add_argument
(
"
-v
"
,
"
--verbose
"
,
action
=
"
count
"
,
dest
=
"
verbose
"
,
default
=
0
,
help
=
(
"
Increase verbosity.
"
),
)
options
=
parser
.
parse_args
(
args
)
if
options
.
verbose
:
logging
.
basicConfig
(
level
=
logging
.
DEBUG
)
logging
.
debug
(
options
)
gitlab
=
gl
.
Gitlab
(
options
.
gitlab_url
,
private_token
=
options
.
token
)
group
=
gitlab
.
groups
.
get
(
options
.
groupid
)
print
(
"
Getting projects in group {}
"
.
format
(
group
.
name
))
projects
=
get_projects_from_group
(
gitlab
,
group
)
p
=
{}
for
project
in
projects
:
exportpath
=
(
os
.
path
.
join
(
options
.
exportpath
,
project
.
path_with_namespace
)
+
"
.tar.gz
"
)
os
.
makedirs
(
os
.
path
.
dirname
(
exportpath
),
exist_ok
=
True
)
exportobject
=
project
.
exports
.
create
(
data
=
None
)
p
[
project
.
path_with_namespace
]
=
{
"
project
"
:
project
,
"
exportpath
"
:
exportpath
,
"
export
"
:
exportobject
,
"
downloaded
"
:
False
,
}
print
(
"
Triggered creation of export for {}
"
.
format
(
project
.
name
))
while
True
:
in_started_state
=
0
for
project
in
p
.
keys
():
if
p
[
project
][
"
downloaded
"
]:
continue
export
=
p
[
project
][
"
export
"
]
export
.
refresh
()
status
=
export
.
export_status
p
[
project
][
"
laststatus
"
]
=
status
if
status
!=
"
finished
"
:
logging
.
debug
(
"
Project export status for %s: %s
"
,
project
,
status
)
if
(
status
==
"
started
"
or
status
==
"
queued
"
or
status
==
"
regeneration_in_progress
"
):
in_started_state
+=
1
continue
if
status
==
"
finished
"
:
print
(
"
Downloading export for {}
"
.
format
(
project
))
exportpath
=
p
[
project
][
"
exportpath
"
]
with
open
(
exportpath
,
"
w+b
"
)
as
f
:
f
.
write
(
export
.
download
())
p
[
project
][
"
downloaded
"
]
=
True
if
in_started_state
==
0
:
break
print
(
"
Waiting for exports to be finished ({})
"
.
format
(
in_started_state
))
time
.
sleep
(
5
)
for
project
in
p
.
keys
():
if
p
[
project
][
"
downloaded
"
]:
print
(
"
Project {} was downloaded to {}
"
.
format
(
project
,
p
[
project
][
"
exportpath
"
]
)
)
for
project
in
p
.
keys
():
if
not
p
[
project
][
"
downloaded
"
]:
print
(
"
Project {}: export failed with status {}
"
.
format
(
project
,
p
[
project
][
"
laststatus
"
]
)
)
if
__name__
==
"
__main__
"
:
main
(
sys
.
argv
[
1
:])
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment