Skip to content

Commit

Permalink
[vcpkg] Improve versioning files generators (microsoft#15172)
Browse files Browse the repository at this point in the history
* Remove port version splitting from x-history

* Parallelize versions file generator

* Use cpu_count()/2 to avoid crashes

* Use generatePortVersionsDb.py output to generate baseline

* Update scripts/generateBaseline.py

Co-authored-by: Adam Johnson <[email protected]>

* rename generateBaseline function

* Update toolsrc/src/vcpkg/commands.porthistory.cpp

Co-authored-by: ras0219 <[email protected]>

* Remove unused code

Co-authored-by: Adam Johnson <[email protected]>
Co-authored-by: ras0219 <[email protected]>
  • Loading branch information
3 people authored Jan 5, 2021
1 parent 378ffbb commit 6d3d649
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 163 deletions.
103 changes: 62 additions & 41 deletions scripts/generateBaseline.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,71 @@
import os
import json
import subprocess
import sys
import json
import time

from pathlib import Path


SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports')
VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions')


def generate_baseline():
start_time = time.time()

def generate_baseline(ports_path, output_filepath):
# Assume each directory in ${VCPKG_ROOT}/ports is a different port
port_names = [item for item in os.listdir(
ports_path) if os.path.isdir(os.path.join(ports_path, item))]
PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))]
port_names.sort()

total = len(port_names)
baseline_versions = {}
for counter, port_name in enumerate(port_names):
vcpkg_exe = os.path.join(SCRIPT_DIRECTORY, '../vcpkg')
print(f'[{counter + 1}/{total}] Getting package info for {port_name}')
output = subprocess.run(
[vcpkg_exe, 'x-package-info', '--x-json', port_name],
capture_output=True,
encoding='utf-8')

if output.returncode == 0:
package_info = json.loads(output.stdout)
port_info = package_info['results'][port_name]

version = {}
for scheme in ['version-string', 'version-semver', 'version-date', 'version']:
if scheme in port_info:
version[scheme] = package_info['results'][port_name][scheme]
break
version['port-version'] = 0
if 'port-version' in port_info:
version['port-version'] = port_info['port-version']
baseline_versions[port_name] = version
else:
print(f'x-package-info --x-json {port_name} failed: ', output.stdout.strip(), file=sys.stderr)

output = {}
output['default'] = baseline_versions

with open(output_filepath, 'r') as output_file:
json.dump(baseline_versions, output_file)
sys.exit(0)


if __name__ == '__main__':
generate_baseline(
ports_path=f'{SCRIPT_DIRECTORY}/../ports', output_filepath='baseline.json')
baseline_entries = {}
total_count = len(port_names)
for i, port_name in enumerate(port_names, 1):
port_file_path = os.path.join(
VERSIONS_DB_DIRECTORY, f'{port_name[0]}-', f'{port_name}.json')

if not os.path.exists(port_file_path):
print(
f'Error: No version file for {port_name}.\n', file=sys.stderr)
continue
sys.stderr.write(
f'\rProcessed {i}/{total_count} ({i/total_count:.2%})')
with open(port_file_path, 'r') as db_file:
try:
versions_object = json.load(db_file)
if versions_object['versions']:
last_version = versions_object['versions'][0]
version_obj = {}
if 'version' in last_version:
version_obj['version'] = last_version['version']
elif 'version-date' in last_version:
version_obj['version-date'] = last_version['version-date']
elif 'version-semver' in last_version:
version_obj['version-semver'] - last_version['version-semver']
else:
version_obj['version-string'] = last_version['version-string']
version_obj['port-version'] = last_version['port-version']
baseline_entries[port_name] = version_obj
except json.JSONDecodeError as e:
print(f'Error: Decoding {port_file_path}\n{e}\n')
baseline_object = {}
baseline_object['default'] = baseline_entries

os.makedirs(VERSIONS_DB_DIRECTORY, exist_ok=True)
baseline_path = os.path.join(VERSIONS_DB_DIRECTORY, 'baseline.json')
with open(baseline_path, 'w') as baseline_file:
json.dump(baseline_object, baseline_file)

elapsed_time = time.time() - start_time
print(f'\nElapsed time: {elapsed_time:.2f} seconds')


def main():
if not os.path.exists(VERSIONS_DB_DIRECTORY):
print(f'Version DB files must exist before generating a baseline.\nRun: `python generatePortVersionsDB`\n')
generate_baseline()


if __name__ == "__main__":
main()
103 changes: 38 additions & 65 deletions scripts/generatePortVersionsDb.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import os
import os.path
import sys
import subprocess
import json
import time
import shutil

from subprocess import CalledProcessError
from json.decoder import JSONDecodeError
import multiprocessing

from pathlib import Path


MAX_PROCESSES = multiprocessing.cpu_count()
SCRIPT_DIRECTORY = os.path.dirname(os.path.abspath(__file__))
PORTS_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../ports')
VERSIONS_DB_DIRECTORY = os.path.join(SCRIPT_DIRECTORY, '../port_versions')


def get_current_git_ref():
Expand All @@ -24,91 +26,62 @@ def get_current_git_ref():
return None


def generate_port_versions_db(ports_path, db_path, revision):
def generate_port_versions_file(port_name):
containing_dir = os.path.join(VERSIONS_DB_DIRECTORY, f'{port_name[0]}-')
os.makedirs(containing_dir, exist_ok=True)

output_file_path = os.path.join(containing_dir, f'{port_name}.json')
if not os.path.exists(output_file_path):
env = os.environ.copy()
env['GIT_OPTIONAL_LOCKS'] = '0'
output = subprocess.run(
[os.path.join(SCRIPT_DIRECTORY, '../vcpkg.exe'),
'x-history', port_name, '--x-json', f'--output={output_file_path}'],
capture_output=True, encoding='utf-8', env=env)
if output.returncode != 0:
print(f'x-history {port_name} failed: ',
output.stdout.strip(), file=sys.stderr)


def generate_port_versions_db(revision):
start_time = time.time()

# Assume each directory in ${VCPKG_ROOT}/ports is a different port
port_names = [item for item in os.listdir(
ports_path) if os.path.isdir(os.path.join(ports_path, item))]
port_names.sort()
PORTS_DIRECTORY) if os.path.isdir(os.path.join(PORTS_DIRECTORY, item))]
total_count = len(port_names)

# Dictionary to collect the latest version of each port as baseline
baseline_objects = {}
baseline_objects['default'] = {}

for counter, port_name in enumerate(port_names):
containing_dir = os.path.join(db_path, f'{port_name[0]}-')
os.makedirs(containing_dir, exist_ok=True)

output_filepath = os.path.join(containing_dir, f'{port_name}.json')
if not os.path.exists(output_filepath):
output = subprocess.run(
[os.path.join(SCRIPT_DIRECTORY, '../vcpkg'),
'x-history', port_name, '--x-json'],
capture_output=True, encoding='utf-8')

if output.returncode == 0:
try:
versions_object = json.loads(output.stdout)

# Put latest version in baseline dictionary
latest_version = versions_object["versions"][0]
baseline_objects['default'][port_name] = {
"version-string": latest_version["version-string"],
"port-version": latest_version["port-version"]
}
with open(output_filepath, 'w') as output_file:
json.dump(versions_object, output_file)
except JSONDecodeError:
print(
f'Malformed JSON from vcpkg x-history {port_name}: ', output.stdout.strip(), file=sys.stderr)
else:
print(f'x-history {port_name} failed: ',
output.stdout.strip(), file=sys.stderr)

# This should be replaced by a progress bar
if counter > 0 and counter % 100 == 0:
elapsed_time = time.time() - start_time
print(
f'Processed {counter} out of {total_count}. Elapsed time: {elapsed_time:.2f} seconds')

# Generate baseline.json
baseline_file_path = os.path.join(db_path, 'baseline.json')
with open(baseline_file_path, 'w') as baseline_output_file:
json.dump(baseline_objects, baseline_output_file)
concurrency = MAX_PROCESSES / 2
print(f'Running {concurrency:.0f} parallel processes')
process_pool = multiprocessing.Pool(MAX_PROCESSES)
for i, _ in enumerate(process_pool.imap_unordered(generate_port_versions_file, port_names), 1):
sys.stderr.write(
f'\rProcessed: {i}/{total_count} ({(i / total_count):.2%})')
process_pool.close()
process_pool.join()

# Generate timestamp
rev_file = os.path.join(db_path, revision)
rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision)
Path(rev_file).touch()

elapsed_time = time.time() - start_time
print(
f'Processed {total_count} total ports. Elapsed time: {elapsed_time:.2f} seconds')
f'\nElapsed time: {elapsed_time:.2f} seconds')


def main(ports_path, db_path):
def main():
revision = get_current_git_ref()
if not revision:
print('Couldn\'t fetch current Git revision', file=sys.stderr)
sys.exit(1)

rev_file = os.path.join(db_path, revision)
rev_file = os.path.join(VERSIONS_DB_DIRECTORY, revision)
if os.path.exists(rev_file):
print(f'Database files already exist for commit {revision}')
sys.exit(0)

if (os.path.exists(db_path)):
try:
shutil.rmtree(db_path)
except OSError as e:
print(f'Could not delete folder: {db_path}.\nError: {e.strerror}')

generate_port_versions_db(ports_path=ports_path,
db_path=db_path,
revision=revision)
generate_port_versions_db(revision)


if __name__ == "__main__":
main(ports_path=os.path.join(SCRIPT_DIRECTORY, '../ports'),
db_path=os.path.join(SCRIPT_DIRECTORY, '../port_versions'))
main()
Loading

0 comments on commit 6d3d649

Please sign in to comment.