Skip to content

Commit

Permalink
add bootstrap script to download small LFS files efficiently
Browse files Browse the repository at this point in the history
The `create-bootstrap` script searches a repository for smallish LFS
files, combines them into larger LFS files, and adds them to a new
orphan branch called `bootstrap`. In addition, the script adds a `boot`
script to the orphan branch which splits the larger LFS files up, again.

In order to leverage the Git LFS pack files, the Git user needs to get
the `bootstrap` branch and run the `boot` script.
  • Loading branch information
larsxschneider committed Nov 8, 2019
1 parent 6596df8 commit ab319f3
Show file tree
Hide file tree
Showing 3 changed files with 331 additions and 0 deletions.
180 changes: 180 additions & 0 deletions scripts/boostrap/boot
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#!/usr/bin/perl
#
# Bootstrap a repository. See here for more info:
# https://github.com/github/platform-samples/tree/master/scripts/bootstrap/create-bootstrap
#

use 5.010;
use strict;
use warnings;
use File::Basename;
use MIME::Base64;

my $min_git_version=2.16.0;
my $min_git_lfs_version=2.3.4;

sub error_exit {
my($msg) = shift;
$msg = 'Bootstrapping repository failed.' if !$msg;
print STDERR "ERROR: $msg\n";
exit 1;
}

sub run {
my($cmd, $err_msg) = @_;
system($cmd) == 0 or error_exit($err_msg);
}

# Set a local config for the repository
sub config {
my($keyvalue) = shift;
run('git config --local ' . $keyvalue);
}

sub header {
my($str) = shift;
print "\n##############################################################\n";
print " " . $str;
print "\n##############################################################\n";
}

my $start = time;

header('Checking Git and Git LFS...');

#
# Upgrade Git
#
# TODO: Currently we upgrade Git only Windows. In the future we could check if
# Git is installed via Homebrew on MacOS and upgrade it there too.
if ($^O eq 'MSWin32') {
system('git update-git-for-windows --gui');
}

#
# Check versions
#
my ($git_version) = `git --version` =~ /([0-9]+([.][0-9]+)+)/;
if (version->parse($git_version) lt version->parse($min_git_version)) {
error_exit("Git version $git_version on this system is outdated. Please upgrade to the latest version!");
}
print "Git version: $git_version\n";

my ($git_lfs_version) = `git lfs version` =~ /([0-9]+([.][0-9]+)+)/;
if (!$git_lfs_version) {
error_exit("Git LFS seems not to be installed on this system.\nPlease follow install instructions on https://git-lfs.github.com/");
}
if (version->parse($git_lfs_version) lt version->parse($min_git_lfs_version)) {
error_exit("Git LFS version $git_version on this system is outdated. Please upgrade to the latest version!");
}
print "Git LFS version: $git_lfs_version\n";

if (system('git config user.name >/dev/null') != 0) {
print "\nIt looks like your name was not configured in Git yet.\n";
print "Please enter your name: ";
chomp(my $username = <STDIN>);
system('git config --global user.name ' . $username);
}
if (system('git config user.email >/dev/null') != 0) {
# TODO: We could check for the correct email format here
print "\nIt looks like your email was not configured in Git yet.\n";
print "Please enter your email address: ";
chomp(my $email = <STDIN>);
system('git config --global user.email ' . $email);
} else {
print "\nGit user: " . `git config --null user.name` . "\n";
print "Git email: " . `git config --null user.email` . "\n";
}

header('Bootstrapping repository...');

#
# Configure the repo
#
chdir dirname(__FILE__);

if (`git rev-parse --abbrev-ref HEAD` !~ /bootstrap/) {
error_exit("Please run '$0' from the bootstrap branch");
}

# Ensure we are starting from a clean state in case the script is failed
# in a previous run.
run('git reset --hard HEAD --quiet');
run('git clean --force -fdx');

# Ensure Git LFS is initialized in the repo
run('git lfs install --local >/dev/null', 'Initializing Git LFS failed.');

# Enable file system cache on Windows (no effect on OS X/Linux)
# see https://groups.google.com/forum/#!topic/git-for-windows/9WrSosaa4A8
config('core.fscache true');

# If the Git LFS locking feature is used, then Git LFS will set lockable files
# to "readonly" by default. This is implemented with a Git LFS "post-checkout"
# hook. Git LFS can skip this hook if no file is locked. However, Git LFS needs
# to traverse the entire tree to find all ".gitattributes" and check for locked
# files. In a large tree (e.g. >20k directories, >300k files) this can take a
# while. Instruct Git LFS to not set lockable files to "readonly". This skips
# the "post-checkout" entirely and speeds up Git LFS for large repositories.
config('lfs.setlockablereadonly false');

# Enable long path support for Windows (no effect on OS X/Linux)
# Git uses the proper API to create long paths on Windows. However, many
# Windows applications use an outdated API that only support paths up to a
# length of 260 characters. As a result these applications would not be able to
# work with the longer paths properly. Keep that in mind if you run into path
# trouble!
# see https://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx
config('core.longpaths true');

if (system('git config core.untrackedCache >/dev/null 2>&1') == 1 &&
system('git update-index --test-untracked-cache') == 0) {
# Enable untracked cache if the file system supports it
# see https://news.ycombinator.com/item?id=11388479
config('core.untrackedCache true');
config('feature.manyFiles true');
}

config('protocol.version 2');

# Download Submodule content in parallel
# see https://git-scm.com/docs/git-config#Documentation/git-config.txt-submodulefetchJobs
config('submodule.fetchJobs 0');

# Speed up "git status" and by suppressing unnecessary terminal output
# see https://github.com/git/git/commit/fd9b544a2991ad74d73ad1bc0af4d24f91a6802b
config('status.aheadBehind false');

#
# Prepare the repo
#

if (-e 'pack/lfs-objects-1.tar.gz') {
# Get the LFS "pack files"
run('git lfs pull --include="pack/lfs-objects-*.tar.gz"', 'Downloading Git LFS pack files failed.');
print "\n";

my $error_lfs = 'Extracting Git LFS pack files failed.';
my $progress = 0;
open(my $pipe, 'tar -xzvf pack/lfs-objects-* 2>&1 |') or error_exit($error_lfs);
while (my $line = <$pipe> ) {
$progress++;
print "\rExtracting LFS objects: $progress/lfs_pack_count";
}
close($pipe) or error_exit($error_lfs);
print "\n";
}

# Check out default branch
run('git checkout --force default_branch');

if (-e '.gitmodules') {
run('git submodule update --init --recursive --reference .git');
}

# Cleanup now obsolete Git LFS pack files
run('git -c lfs.fetchrecentcommitsdays=0 -c lfs.fetchrecentrefsdays=0 -c lfs.fetchrecentremoterefs=false -c lfs.pruneoffsetdays=0 lfs prune >/dev/null');

header('Hurray! Your Git repository is ready for you!');
my $duration = time - $start;
print "Bootstrap time: $duration s\n";
4 changes: 4 additions & 0 deletions scripts/boostrap/boot.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@echo off
pushd %~dp0
"%ProgramFiles%"\Git\bin\sh.exe -c "./boot"
popd
147 changes: 147 additions & 0 deletions scripts/boostrap/create-bootstrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
#!/usr/bin/env bash
#
# The `create-bootstrap` script searches a repository for smallish LFS files,
# combines them into larger LFS files, and adds them to a new orphan branch
# called `bootstrap`. In addition, the script adds a `boot` script to the
# orphan branch which splits the larger LFS files up, again.
#
# In order to leverage the Git LFS pack files, the Git user needs to get the
# `bootstrap` branch and run the `boot` script.
#
# Usage:
# 1. Clone your repository with the smallish LFS files
# 2. `cd` into the repository
# 3. Run this script
#
set -e

base_dir=$(cd "${0%/*}" && pwd)
# force=1;

function header {
echo ""
echo "##############################################################"
echo " $1"
echo "##############################################################"
}

function error {
echo "ERROR: $1"
exit 1
}

if [ ! -d .git ]; then
error "Looks like you are not in the root directory of a Git repository."
fi

if [ -z "$force" ] && git rev-parse --verify origin/bootstrap >/dev/null 2>&1; then
error "Branch 'bootstrap' exists already. Please delete it!"
fi

default_branch=$(git rev-parse --abbrev-ref HEAD)
remote_url=$(git config --get remote.origin.url)
repo_name=${remote_url##*/}
repo_name=${repo_name%.git}

header "Ensure relevant Git LFS objects are present..."
git pull
git lfs pull
git submodule foreach --recursive git lfs pull
git \
-c lfs.fetchrecentcommitsdays=0 \
-c lfs.fetchrecentrefsdays=0 \
-c lfs.fetchrecentremoterefs=false \
-c lfs.pruneoffsetdays=0 \
lfs prune
git submodule foreach --recursive git \
-c lfs.fetchrecentcommitsdays=0 \
-c lfs.fetchrecentrefsdays=0 \
-c lfs.fetchrecentremoterefs=false \
-c lfs.pruneoffsetdays=0 \
lfs prune

header "1/4 Creating 'bootstrap' branch..."
git checkout --orphan bootstrap
git reset
git clean -fdx --force --quiet

header "2/4 Creating Git LFS pack files..."

# Copy LFS files of the submodule into the parent repo to make them
# part of the LFS packfile
if [ -e ./.git/modules ]; then
find ./.git/modules -type d -path '*/lfs' -exec cp -rf {} .git/ \;
fi

# Find all LFS files smaller than 256MB and put them into tar files no
# larger than 256MB. Finally, print the number of total files added to
# the archives.
rm -rf pack
mkdir pack
lfs_pack_count=$(
find ./.git/lfs/objects -type f |
perl -ne '
my $path = $_;
chomp($path);
my $size = -s $path;
if ($batch_size + $size > 256*1024*1024 || !$batch_id) {
$batch_id++;
$batch_size = 0;
}
if ($path && $size < 256*1024*1024) {
$total_count++;
$batch_size += $size;
$tar = "pack/lfs-objects-$batch_id.tar";
`tar -rf $tar $path`;
}
print $total_count if eof();
'
)
# Compress those tar files
gzip pack/*
git lfs track 'pack/lfs-objects-*.tar.gz'
git add pack/lfs-objects-*.tar.gz 2>/dev/null || true

# Boot entry point for Linux/MacOS (bash)
cp "$base_dir/boot" boot
perl -pi -e "s/default_branch/$default_branch/" boot
perl -pi -e "s/lfs_pack_count/$lfs_pack_count/" boot

# Boot entry point for Windows (cmd.exe)
cp "$base_dir/boot.bat" boot.bat

cat << EOF > README.md
## Bootstrap Branch
This branch is not related to the rest of the repository content.
The purpose of this branch is to bootstrap the repository quickly
using Git LFS pack files and setting useful defaults.
Bootstrap the repository with the following commands.
### Windows (cmd.exe)
\`\`\`
$ git clone $remote_url --branch bootstrap && $repo_name\\boot.bat
\`\`\`
### Linux/MacOS (bash):
\`\`\`
$ git clone $remote_url --branch bootstrap && ./$repo_name/boot
\`\`\`
EOF

# Note: We intentionally do not add the `.gitattributes` file here.
# This ensures the Git LFS pack files are not downloaded during
# the initial clone and only with the `boot` script.
git add README.md boot boot.bat

header "3/4 Uploading 'bootstrap' branch..."
git -c user.email="[email protected]" \
-c user.name="Bootstrap Creator" \
commit --quiet --message="Initial commit"
git push --force --set-upstream origin bootstrap

header "4/4 Done"
cat README.md

0 comments on commit ab319f3

Please sign in to comment.