2021-01-25 21:49:01 -05:00
|
|
|
import argparse
|
2019-11-26 22:22:14 -05:00
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import tarfile
|
2021-01-25 21:49:01 -05:00
|
|
|
import tempfile
|
2021-10-14 08:19:50 -04:00
|
|
|
import time
|
2019-11-26 22:22:14 -05:00
|
|
|
import zipfile
|
2020-06-17 04:03:49 -04:00
|
|
|
from functools import wraps
|
2021-10-14 08:19:50 -04:00
|
|
|
from typing import Any, Callable, Dict, List, Optional
|
2019-11-26 22:22:14 -05:00
|
|
|
|
|
|
|
import gitlab
|
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
TR = Callable[..., Any]
|
|
|
|
|
|
|
|
|
|
|
|
def retry(func: TR) -> TR:
|
|
|
|
"""
|
|
|
|
This wrapper will only catch several exception types associated with
|
|
|
|
"network issues" and retry the whole function.
|
|
|
|
"""
|
|
|
|
@wraps(func)
|
|
|
|
def wrapper(self: 'Gitlab', *args: Any, **kwargs: Any) -> Any:
|
|
|
|
retried = 0
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
res = func(self, *args, **kwargs)
|
|
|
|
except (IOError, EOFError, gitlab.exceptions.GitlabError) as e:
|
2022-01-17 01:23:03 -05:00
|
|
|
if isinstance(e, gitlab.exceptions.GitlabError):
|
|
|
|
if e.response_code == 500:
|
|
|
|
# retry on this error
|
|
|
|
pass
|
|
|
|
elif e.response_code == 404 and os.environ.get('LOCAL_GITLAB_HTTPS_HOST', None):
|
|
|
|
# remove the environment variable "LOCAL_GITLAB_HTTPS_HOST" and retry
|
|
|
|
os.environ.pop('LOCAL_GITLAB_HTTPS_HOST', None)
|
|
|
|
else:
|
|
|
|
# other GitlabErrors aren't retried
|
|
|
|
raise e
|
2021-10-14 08:19:50 -04:00
|
|
|
retried += 1
|
|
|
|
if retried > self.DOWNLOAD_ERROR_MAX_RETRIES:
|
|
|
|
raise e # get out of the loop
|
|
|
|
else:
|
|
|
|
print('Network failure in {}, retrying ({})'.format(getattr(func, '__name__', '(unknown callable)'), retried))
|
|
|
|
time.sleep(2 ** retried) # wait a bit more after each retry
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
return res
|
|
|
|
return wrapper
|
|
|
|
|
2019-11-26 22:22:14 -05:00
|
|
|
|
|
|
|
class Gitlab(object):
|
2021-01-25 21:49:01 -05:00
|
|
|
JOB_NAME_PATTERN = re.compile(r'(\w+)(\s+(\d+)/(\d+))?')
|
2019-11-26 22:22:14 -05:00
|
|
|
|
2020-06-17 04:03:49 -04:00
|
|
|
DOWNLOAD_ERROR_MAX_RETRIES = 3
|
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
def __init__(self, project_id: Optional[int] = None):
|
2021-01-25 21:49:01 -05:00
|
|
|
config_data_from_env = os.getenv('PYTHON_GITLAB_CONFIG')
|
2019-11-26 22:22:14 -05:00
|
|
|
if config_data_from_env:
|
|
|
|
# prefer to load config from env variable
|
2021-01-25 21:49:01 -05:00
|
|
|
with tempfile.NamedTemporaryFile('w', delete=False) as temp_file:
|
2019-11-26 22:22:14 -05:00
|
|
|
temp_file.write(config_data_from_env)
|
2021-10-14 08:19:50 -04:00
|
|
|
config_files = [temp_file.name] # type: Optional[List[str]]
|
2019-11-26 22:22:14 -05:00
|
|
|
else:
|
|
|
|
# otherwise try to use config file at local filesystem
|
|
|
|
config_files = None
|
2021-10-14 08:19:50 -04:00
|
|
|
self._init_gitlab_inst(project_id, config_files)
|
|
|
|
|
|
|
|
@retry
|
|
|
|
def _init_gitlab_inst(self, project_id: Optional[int], config_files: Optional[List[str]]) -> None:
|
2021-01-25 21:49:01 -05:00
|
|
|
gitlab_id = os.getenv('LOCAL_GITLAB_HTTPS_HOST') # if None, will use the default gitlab server
|
2020-09-16 14:52:03 -04:00
|
|
|
self.gitlab_inst = gitlab.Gitlab.from_config(gitlab_id=gitlab_id, config_files=config_files)
|
2019-11-26 22:22:14 -05:00
|
|
|
self.gitlab_inst.auth()
|
|
|
|
if project_id:
|
|
|
|
self.project = self.gitlab_inst.projects.get(project_id)
|
|
|
|
else:
|
|
|
|
self.project = None
|
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
@retry
|
|
|
|
def get_project_id(self, name: str, namespace: Optional[str] = None) -> int:
|
2019-11-26 22:22:14 -05:00
|
|
|
"""
|
|
|
|
search project ID by name
|
|
|
|
|
|
|
|
:param name: project name
|
|
|
|
:param namespace: namespace to match when we have multiple project with same name
|
|
|
|
:return: project ID
|
|
|
|
"""
|
|
|
|
projects = self.gitlab_inst.projects.list(search=name)
|
2021-04-19 07:34:11 -04:00
|
|
|
res = []
|
2019-11-26 22:22:14 -05:00
|
|
|
for project in projects:
|
|
|
|
if namespace is None:
|
|
|
|
if len(projects) == 1:
|
2021-04-19 07:34:11 -04:00
|
|
|
res.append(project.id)
|
2019-11-26 22:22:14 -05:00
|
|
|
break
|
2021-04-19 07:34:11 -04:00
|
|
|
|
2021-01-25 21:49:01 -05:00
|
|
|
if project.namespace['path'] == namespace:
|
2021-04-19 07:34:11 -04:00
|
|
|
if project.name == name:
|
|
|
|
res.insert(0, project.id)
|
|
|
|
else:
|
|
|
|
res.append(project.id)
|
|
|
|
|
|
|
|
if not res:
|
2019-11-26 22:22:14 -05:00
|
|
|
raise ValueError("Can't find project")
|
2021-10-14 08:19:50 -04:00
|
|
|
return int(res[0])
|
2019-11-26 22:22:14 -05:00
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
@retry
|
|
|
|
def download_artifacts(self, job_id: int, destination: str) -> None:
|
2019-11-26 22:22:14 -05:00
|
|
|
"""
|
|
|
|
download full job artifacts and extract to destination.
|
|
|
|
|
|
|
|
:param job_id: Gitlab CI job ID
|
|
|
|
:param destination: extract artifacts to path.
|
|
|
|
"""
|
|
|
|
job = self.project.jobs.get(job_id)
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
|
|
|
job.artifacts(streamed=True, action=temp_file.write)
|
|
|
|
|
2021-01-25 21:49:01 -05:00
|
|
|
with zipfile.ZipFile(temp_file.name, 'r') as archive_file:
|
2019-11-26 22:22:14 -05:00
|
|
|
archive_file.extractall(destination)
|
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
@retry
|
|
|
|
def download_artifact(self, job_id: int, artifact_path: str, destination: Optional[str] = None) -> List[bytes]:
|
2019-11-26 22:22:14 -05:00
|
|
|
"""
|
|
|
|
download specific path of job artifacts and extract to destination.
|
|
|
|
|
|
|
|
:param job_id: Gitlab CI job ID
|
|
|
|
:param artifact_path: list of path in artifacts (relative path to artifact root path)
|
|
|
|
:param destination: destination of artifact. Do not save to file if destination is None
|
|
|
|
:return: A list of artifact file raw data.
|
|
|
|
"""
|
|
|
|
job = self.project.jobs.get(job_id)
|
|
|
|
|
|
|
|
raw_data_list = []
|
|
|
|
|
|
|
|
for a_path in artifact_path:
|
|
|
|
try:
|
2021-10-14 08:19:50 -04:00
|
|
|
data = job.artifact(a_path) # type: bytes
|
2019-11-26 22:22:14 -05:00
|
|
|
except gitlab.GitlabGetError as e:
|
2020-07-21 04:59:31 -04:00
|
|
|
print("Failed to download '{}' from job {}".format(a_path, job_id))
|
2019-11-26 22:22:14 -05:00
|
|
|
raise e
|
|
|
|
raw_data_list.append(data)
|
|
|
|
if destination:
|
|
|
|
file_path = os.path.join(destination, a_path)
|
|
|
|
try:
|
|
|
|
os.makedirs(os.path.dirname(file_path))
|
|
|
|
except OSError:
|
|
|
|
# already exists
|
|
|
|
pass
|
2021-01-25 21:49:01 -05:00
|
|
|
with open(file_path, 'wb') as f:
|
2019-11-26 22:22:14 -05:00
|
|
|
f.write(data)
|
|
|
|
|
|
|
|
return raw_data_list
|
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
@retry
|
|
|
|
def find_job_id(self, job_name: str, pipeline_id: Optional[str] = None, job_status: str = 'success') -> List[Dict]:
|
2019-11-26 22:22:14 -05:00
|
|
|
"""
|
|
|
|
Get Job ID from job name of specific pipeline
|
|
|
|
|
|
|
|
:param job_name: job name
|
|
|
|
:param pipeline_id: If None, will get pipeline id from CI pre-defined variable.
|
2020-01-21 21:04:54 -05:00
|
|
|
:param job_status: status of job. One pipeline could have multiple jobs with same name after retry.
|
|
|
|
job_status is used to filter these jobs.
|
2019-11-26 22:22:14 -05:00
|
|
|
:return: a list of job IDs (parallel job will generate multiple jobs)
|
|
|
|
"""
|
|
|
|
job_id_list = []
|
|
|
|
if pipeline_id is None:
|
2021-01-25 21:49:01 -05:00
|
|
|
pipeline_id = os.getenv('CI_PIPELINE_ID')
|
2019-11-26 22:22:14 -05:00
|
|
|
pipeline = self.project.pipelines.get(pipeline_id)
|
|
|
|
jobs = pipeline.jobs.list(all=True)
|
|
|
|
for job in jobs:
|
|
|
|
match = self.JOB_NAME_PATTERN.match(job.name)
|
|
|
|
if match:
|
2020-01-21 21:04:54 -05:00
|
|
|
if match.group(1) == job_name and job.status == job_status:
|
2021-01-25 21:49:01 -05:00
|
|
|
job_id_list.append({'id': job.id, 'parallel_num': match.group(3)})
|
2019-11-26 22:22:14 -05:00
|
|
|
return job_id_list
|
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
@retry
|
|
|
|
def download_archive(self, ref: str, destination: str, project_id: Optional[int] = None) -> str:
|
2019-11-26 22:22:14 -05:00
|
|
|
"""
|
|
|
|
Download archive of certain commit of a repository and extract to destination path
|
|
|
|
|
|
|
|
:param ref: commit or branch name
|
|
|
|
:param destination: destination path of extracted archive file
|
|
|
|
:param project_id: download project of current instance if project_id is None
|
|
|
|
:return: root path name of archive file
|
|
|
|
"""
|
|
|
|
if project_id is None:
|
|
|
|
project = self.project
|
|
|
|
else:
|
|
|
|
project = self.gitlab_inst.projects.get(project_id)
|
|
|
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
|
|
|
try:
|
|
|
|
project.repository_archive(sha=ref, streamed=True, action=temp_file.write)
|
|
|
|
except gitlab.GitlabGetError as e:
|
2021-01-25 21:49:01 -05:00
|
|
|
print('Failed to archive from project {}'.format(project_id))
|
2019-11-26 22:22:14 -05:00
|
|
|
raise e
|
|
|
|
|
2021-01-25 21:49:01 -05:00
|
|
|
print('archive size: {:.03f}MB'.format(float(os.path.getsize(temp_file.name)) / (1024 * 1024)))
|
2019-11-26 22:22:14 -05:00
|
|
|
|
2021-01-25 21:49:01 -05:00
|
|
|
with tarfile.open(temp_file.name, 'r') as archive_file:
|
2019-11-26 22:22:14 -05:00
|
|
|
root_name = archive_file.getnames()[0]
|
|
|
|
archive_file.extractall(destination)
|
|
|
|
|
|
|
|
return os.path.join(os.path.realpath(destination), root_name)
|
|
|
|
|
|
|
|
|
2021-10-14 08:19:50 -04:00
|
|
|
def main() -> None:
|
2019-11-26 22:22:14 -05:00
|
|
|
parser = argparse.ArgumentParser()
|
2021-01-25 21:49:01 -05:00
|
|
|
parser.add_argument('action')
|
|
|
|
parser.add_argument('project_id', type=int)
|
|
|
|
parser.add_argument('--pipeline_id', '-i', type=int, default=None)
|
|
|
|
parser.add_argument('--ref', '-r', default='master')
|
|
|
|
parser.add_argument('--job_id', '-j', type=int, default=None)
|
|
|
|
parser.add_argument('--job_name', '-n', default=None)
|
|
|
|
parser.add_argument('--project_name', '-m', default=None)
|
|
|
|
parser.add_argument('--destination', '-d', default=None)
|
|
|
|
parser.add_argument('--artifact_path', '-a', nargs='*', default=None)
|
2019-11-26 22:22:14 -05:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
gitlab_inst = Gitlab(args.project_id)
|
2021-01-25 21:49:01 -05:00
|
|
|
if args.action == 'download_artifacts':
|
2019-11-26 22:22:14 -05:00
|
|
|
gitlab_inst.download_artifacts(args.job_id, args.destination)
|
2021-01-25 21:49:01 -05:00
|
|
|
if args.action == 'download_artifact':
|
2019-11-26 22:22:14 -05:00
|
|
|
gitlab_inst.download_artifact(args.job_id, args.artifact_path, args.destination)
|
2021-01-25 21:49:01 -05:00
|
|
|
elif args.action == 'find_job_id':
|
2019-11-26 22:22:14 -05:00
|
|
|
job_ids = gitlab_inst.find_job_id(args.job_name, args.pipeline_id)
|
2021-01-25 21:49:01 -05:00
|
|
|
print(';'.join([','.join([str(j['id']), j['parallel_num']]) for j in job_ids]))
|
|
|
|
elif args.action == 'download_archive':
|
2019-11-26 22:22:14 -05:00
|
|
|
gitlab_inst.download_archive(args.ref, args.destination)
|
2021-01-25 21:49:01 -05:00
|
|
|
elif args.action == 'get_project_id':
|
2019-11-26 22:22:14 -05:00
|
|
|
ret = gitlab_inst.get_project_id(args.project_name)
|
2021-01-25 21:49:01 -05:00
|
|
|
print('project id: {}'.format(ret))
|
2021-10-14 08:19:50 -04:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|