forked from ClickHouse/ClickHouse
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbuild_download_helper.py
150 lines (118 loc) · 4.81 KB
/
build_download_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
import json
import logging
import os
import sys
import time
from typing import List, Optional
import requests # type: ignore
from ci_config import CI_CONFIG
DOWNLOAD_RETRIES_COUNT = 5
def get_with_retries(
url: str,
retries: int = DOWNLOAD_RETRIES_COUNT,
sleep: int = 3,
**kwargs,
) -> requests.Response:
logging.info(
"Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url
)
exc = None # type: Optional[Exception]
for i in range(retries):
try:
response = requests.get(url, **kwargs)
response.raise_for_status()
break
except Exception as e:
if i + 1 < retries:
logging.info("Exception '%s' while getting, retry %i", e, i + 1)
time.sleep(sleep)
exc = e
else:
raise Exception(exc)
return response
def get_build_name_for_check(check_name) -> str:
return CI_CONFIG["tests_config"][check_name]["required_build"]
def read_build_urls(build_name, reports_path) -> List[str]:
for root, _, files in os.walk(reports_path):
for f in files:
if build_name in f:
logging.info("Found build report json %s", f)
with open(os.path.join(root, f), "r", encoding="utf-8") as file_handler:
build_report = json.load(file_handler)
return build_report["build_urls"]
return []
def download_build_with_progress(url, path):
logging.info("Downloading from %s to temp path %s", url, path)
for i in range(DOWNLOAD_RETRIES_COUNT):
try:
with open(path, "wb") as f:
response = get_with_retries(url, retries=1, stream=True)
total_length = response.headers.get("content-length")
if total_length is None or int(total_length) == 0:
logging.info(
"No content-length, will download file without progress"
)
f.write(response.content)
else:
dl = 0
total_length = int(total_length)
logging.info("Content length is %ld bytes", total_length)
for data in response.iter_content(chunk_size=4096):
dl += len(data)
f.write(data)
if sys.stdout.isatty():
done = int(50 * dl / total_length)
percent = int(100 * float(dl) / total_length)
eq_str = "=" * done
space_str = " " * (50 - done)
sys.stdout.write(f"\r[{eq_str}{space_str}] {percent}%")
sys.stdout.flush()
break
except Exception:
if sys.stdout.isatty():
sys.stdout.write("\n")
if i + 1 < DOWNLOAD_RETRIES_COUNT:
time.sleep(3)
if os.path.exists(path):
os.remove(path)
else:
raise Exception(f"Cannot download dataset from {url}, all retries exceeded")
if sys.stdout.isatty():
sys.stdout.write("\n")
logging.info("Downloading finished")
def download_builds(result_path, build_urls, filter_fn):
for url in build_urls:
if filter_fn(url):
fname = os.path.basename(url.replace("%2B", "+").replace("%20", " "))
logging.info("Will download %s to %s", fname, result_path)
download_build_with_progress(url, os.path.join(result_path, fname))
def download_builds_filter(
check_name, reports_path, result_path, filter_fn=lambda _: True
):
build_name = get_build_name_for_check(check_name)
urls = read_build_urls(build_name, reports_path)
print(urls)
if not urls:
raise Exception("No build URLs found")
download_builds(result_path, urls, filter_fn)
def download_all_deb_packages(check_name, reports_path, result_path):
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("deb")
)
def download_shared_build(check_name, reports_path, result_path):
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("shared_build.tgz")
)
def download_unit_tests(check_name, reports_path, result_path):
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("unit_tests_dbms")
)
def download_clickhouse_binary(check_name, reports_path, result_path):
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("clickhouse")
)
def download_performance_build(check_name, reports_path, result_path):
download_builds_filter(
check_name, reports_path, result_path, lambda x: x.endswith("performance.tgz")
)