forked from github/platform-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add bootstrap script to download small LFS files efficiently
The `create-bootstrap` script searches a repository for smallish LFS files, combines them into larger LFS files, and adds them to a new orphan branch called `bootstrap`. In addition, the script adds a `boot` script to the orphan branch which splits the larger LFS files up, again. In order to leverage the Git LFS pack files, the Git user needs to get the `bootstrap` branch and run the `boot` script.
- Loading branch information
1 parent
6596df8
commit ab319f3
Showing
3 changed files
with
331 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
#!/usr/bin/perl | ||
# | ||
# Bootstrap a repository. See here for more info: | ||
# https://github.com/github/platform-samples/tree/master/scripts/bootstrap/create-bootstrap | ||
# | ||
|
||
use 5.010; | ||
use strict; | ||
use warnings; | ||
use File::Basename; | ||
use MIME::Base64; | ||
|
||
my $min_git_version=2.16.0; | ||
my $min_git_lfs_version=2.3.4; | ||
|
||
sub error_exit { | ||
my($msg) = shift; | ||
$msg = 'Bootstrapping repository failed.' if !$msg; | ||
print STDERR "ERROR: $msg\n"; | ||
exit 1; | ||
} | ||
|
||
sub run { | ||
my($cmd, $err_msg) = @_; | ||
system($cmd) == 0 or error_exit($err_msg); | ||
} | ||
|
||
# Set a local config for the repository | ||
sub config { | ||
my($keyvalue) = shift; | ||
run('git config --local ' . $keyvalue); | ||
} | ||
|
||
sub header { | ||
my($str) = shift; | ||
print "\n##############################################################\n"; | ||
print " " . $str; | ||
print "\n##############################################################\n"; | ||
} | ||
|
||
my $start = time; | ||
|
||
header('Checking Git and Git LFS...'); | ||
|
||
# | ||
# Upgrade Git | ||
# | ||
# TODO: Currently we upgrade Git only Windows. In the future we could check if | ||
# Git is installed via Homebrew on MacOS and upgrade it there too. | ||
if ($^O eq 'MSWin32') { | ||
system('git update-git-for-windows --gui'); | ||
} | ||
|
||
# | ||
# Check versions | ||
# | ||
my ($git_version) = `git --version` =~ /([0-9]+([.][0-9]+)+)/; | ||
if (version->parse($git_version) lt version->parse($min_git_version)) { | ||
error_exit("Git version $git_version on this system is outdated. Please upgrade to the latest version!"); | ||
} | ||
print "Git version: $git_version\n"; | ||
|
||
my ($git_lfs_version) = `git lfs version` =~ /([0-9]+([.][0-9]+)+)/; | ||
if (!$git_lfs_version) { | ||
error_exit("Git LFS seems not to be installed on this system.\nPlease follow install instructions on https://git-lfs.github.com/"); | ||
} | ||
if (version->parse($git_lfs_version) lt version->parse($min_git_lfs_version)) { | ||
error_exit("Git LFS version $git_version on this system is outdated. Please upgrade to the latest version!"); | ||
} | ||
print "Git LFS version: $git_lfs_version\n"; | ||
|
||
if (system('git config user.name >/dev/null') != 0) { | ||
print "\nIt looks like your name was not configured in Git yet.\n"; | ||
print "Please enter your name: "; | ||
chomp(my $username = <STDIN>); | ||
system('git config --global user.name ' . $username); | ||
} | ||
if (system('git config user.email >/dev/null') != 0) { | ||
# TODO: We could check for the correct email format here | ||
print "\nIt looks like your email was not configured in Git yet.\n"; | ||
print "Please enter your email address: "; | ||
chomp(my $email = <STDIN>); | ||
system('git config --global user.email ' . $email); | ||
} else { | ||
print "\nGit user: " . `git config --null user.name` . "\n"; | ||
print "Git email: " . `git config --null user.email` . "\n"; | ||
} | ||
|
||
header('Bootstrapping repository...'); | ||
|
||
# | ||
# Configure the repo | ||
# | ||
chdir dirname(__FILE__); | ||
|
||
if (`git rev-parse --abbrev-ref HEAD` !~ /bootstrap/) { | ||
error_exit("Please run '$0' from the bootstrap branch"); | ||
} | ||
|
||
# Ensure we are starting from a clean state in case the script is failed | ||
# in a previous run. | ||
run('git reset --hard HEAD --quiet'); | ||
run('git clean --force -fdx'); | ||
|
||
# Ensure Git LFS is initialized in the repo | ||
run('git lfs install --local >/dev/null', 'Initializing Git LFS failed.'); | ||
|
||
# Enable file system cache on Windows (no effect on OS X/Linux) | ||
# see https://groups.google.com/forum/#!topic/git-for-windows/9WrSosaa4A8 | ||
config('core.fscache true'); | ||
|
||
# If the Git LFS locking feature is used, then Git LFS will set lockable files | ||
# to "readonly" by default. This is implemented with a Git LFS "post-checkout" | ||
# hook. Git LFS can skip this hook if no file is locked. However, Git LFS needs | ||
# to traverse the entire tree to find all ".gitattributes" and check for locked | ||
# files. In a large tree (e.g. >20k directories, >300k files) this can take a | ||
# while. Instruct Git LFS to not set lockable files to "readonly". This skips | ||
# the "post-checkout" entirely and speeds up Git LFS for large repositories. | ||
config('lfs.setlockablereadonly false'); | ||
|
||
# Enable long path support for Windows (no effect on OS X/Linux) | ||
# Git uses the proper API to create long paths on Windows. However, many | ||
# Windows applications use an outdated API that only support paths up to a | ||
# length of 260 characters. As a result these applications would not be able to | ||
# work with the longer paths properly. Keep that in mind if you run into path | ||
# trouble! | ||
# see https://msdn.microsoft.com/en-us/library/aa365247(VS.85).aspx | ||
config('core.longpaths true'); | ||
|
||
if (system('git config core.untrackedCache >/dev/null 2>&1') == 1 && | ||
system('git update-index --test-untracked-cache') == 0) { | ||
# Enable untracked cache if the file system supports it | ||
# see https://news.ycombinator.com/item?id=11388479 | ||
config('core.untrackedCache true'); | ||
config('feature.manyFiles true'); | ||
} | ||
|
||
config('protocol.version 2'); | ||
|
||
# Download Submodule content in parallel | ||
# see https://git-scm.com/docs/git-config#Documentation/git-config.txt-submodulefetchJobs | ||
config('submodule.fetchJobs 0'); | ||
|
||
# Speed up "git status" and by suppressing unnecessary terminal output | ||
# see https://github.com/git/git/commit/fd9b544a2991ad74d73ad1bc0af4d24f91a6802b | ||
config('status.aheadBehind false'); | ||
|
||
# | ||
# Prepare the repo | ||
# | ||
|
||
if (-e 'pack/lfs-objects-1.tar.gz') { | ||
# Get the LFS "pack files" | ||
run('git lfs pull --include="pack/lfs-objects-*.tar.gz"', 'Downloading Git LFS pack files failed.'); | ||
print "\n"; | ||
|
||
my $error_lfs = 'Extracting Git LFS pack files failed.'; | ||
my $progress = 0; | ||
open(my $pipe, 'tar -xzvf pack/lfs-objects-* 2>&1 |') or error_exit($error_lfs); | ||
while (my $line = <$pipe> ) { | ||
$progress++; | ||
print "\rExtracting LFS objects: $progress/lfs_pack_count"; | ||
} | ||
close($pipe) or error_exit($error_lfs); | ||
print "\n"; | ||
} | ||
|
||
# Check out default branch | ||
run('git checkout --force default_branch'); | ||
|
||
if (-e '.gitmodules') { | ||
run('git submodule update --init --recursive --reference .git'); | ||
} | ||
|
||
# Cleanup now obsolete Git LFS pack files | ||
run('git -c lfs.fetchrecentcommitsdays=0 -c lfs.fetchrecentrefsdays=0 -c lfs.fetchrecentremoterefs=false -c lfs.pruneoffsetdays=0 lfs prune >/dev/null'); | ||
|
||
header('Hurray! Your Git repository is ready for you!'); | ||
my $duration = time - $start; | ||
print "Bootstrap time: $duration s\n"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
@echo off | ||
pushd %~dp0 | ||
"%ProgramFiles%"\Git\bin\sh.exe -c "./boot" | ||
popd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
#!/usr/bin/env bash | ||
# | ||
# The `create-bootstrap` script searches a repository for smallish LFS files, | ||
# combines them into larger LFS files, and adds them to a new orphan branch | ||
# called `bootstrap`. In addition, the script adds a `boot` script to the | ||
# orphan branch which splits the larger LFS files up, again. | ||
# | ||
# In order to leverage the Git LFS pack files, the Git user needs to get the | ||
# `bootstrap` branch and run the `boot` script. | ||
# | ||
# Usage: | ||
# 1. Clone your repository with the smallish LFS files | ||
# 2. `cd` into the repository | ||
# 3. Run this script | ||
# | ||
set -e | ||
|
||
base_dir=$(cd "${0%/*}" && pwd) | ||
# force=1; | ||
|
||
function header { | ||
echo "" | ||
echo "##############################################################" | ||
echo " $1" | ||
echo "##############################################################" | ||
} | ||
|
||
function error { | ||
echo "ERROR: $1" | ||
exit 1 | ||
} | ||
|
||
if [ ! -d .git ]; then | ||
error "Looks like you are not in the root directory of a Git repository." | ||
fi | ||
|
||
if [ -z "$force" ] && git rev-parse --verify origin/bootstrap >/dev/null 2>&1; then | ||
error "Branch 'bootstrap' exists already. Please delete it!" | ||
fi | ||
|
||
default_branch=$(git rev-parse --abbrev-ref HEAD) | ||
remote_url=$(git config --get remote.origin.url) | ||
repo_name=${remote_url##*/} | ||
repo_name=${repo_name%.git} | ||
|
||
header "Ensure relevant Git LFS objects are present..." | ||
git pull | ||
git lfs pull | ||
git submodule foreach --recursive git lfs pull | ||
git \ | ||
-c lfs.fetchrecentcommitsdays=0 \ | ||
-c lfs.fetchrecentrefsdays=0 \ | ||
-c lfs.fetchrecentremoterefs=false \ | ||
-c lfs.pruneoffsetdays=0 \ | ||
lfs prune | ||
git submodule foreach --recursive git \ | ||
-c lfs.fetchrecentcommitsdays=0 \ | ||
-c lfs.fetchrecentrefsdays=0 \ | ||
-c lfs.fetchrecentremoterefs=false \ | ||
-c lfs.pruneoffsetdays=0 \ | ||
lfs prune | ||
|
||
header "1/4 Creating 'bootstrap' branch..." | ||
git checkout --orphan bootstrap | ||
git reset | ||
git clean -fdx --force --quiet | ||
|
||
header "2/4 Creating Git LFS pack files..." | ||
|
||
# Copy LFS files of the submodule into the parent repo to make them | ||
# part of the LFS packfile | ||
if [ -e ./.git/modules ]; then | ||
find ./.git/modules -type d -path '*/lfs' -exec cp -rf {} .git/ \; | ||
fi | ||
|
||
# Find all LFS files smaller than 256MB and put them into tar files no | ||
# larger than 256MB. Finally, print the number of total files added to | ||
# the archives. | ||
rm -rf pack | ||
mkdir pack | ||
lfs_pack_count=$( | ||
find ./.git/lfs/objects -type f | | ||
perl -ne ' | ||
my $path = $_; | ||
chomp($path); | ||
my $size = -s $path; | ||
if ($batch_size + $size > 256*1024*1024 || !$batch_id) { | ||
$batch_id++; | ||
$batch_size = 0; | ||
} | ||
if ($path && $size < 256*1024*1024) { | ||
$total_count++; | ||
$batch_size += $size; | ||
$tar = "pack/lfs-objects-$batch_id.tar"; | ||
`tar -rf $tar $path`; | ||
} | ||
print $total_count if eof(); | ||
' | ||
) | ||
# Compress those tar files | ||
gzip pack/* | ||
git lfs track 'pack/lfs-objects-*.tar.gz' | ||
git add pack/lfs-objects-*.tar.gz 2>/dev/null || true | ||
|
||
# Boot entry point for Linux/MacOS (bash) | ||
cp "$base_dir/boot" boot | ||
perl -pi -e "s/default_branch/$default_branch/" boot | ||
perl -pi -e "s/lfs_pack_count/$lfs_pack_count/" boot | ||
|
||
# Boot entry point for Windows (cmd.exe) | ||
cp "$base_dir/boot.bat" boot.bat | ||
|
||
cat << EOF > README.md | ||
## Bootstrap Branch | ||
This branch is not related to the rest of the repository content. | ||
The purpose of this branch is to bootstrap the repository quickly | ||
using Git LFS pack files and setting useful defaults. | ||
Bootstrap the repository with the following commands. | ||
### Windows (cmd.exe) | ||
\`\`\` | ||
$ git clone $remote_url --branch bootstrap && $repo_name\\boot.bat | ||
\`\`\` | ||
### Linux/MacOS (bash): | ||
\`\`\` | ||
$ git clone $remote_url --branch bootstrap && ./$repo_name/boot | ||
\`\`\` | ||
EOF | ||
|
||
# Note: We intentionally do not add the `.gitattributes` file here. | ||
# This ensures the Git LFS pack files are not downloaded during | ||
# the initial clone and only with the `boot` script. | ||
git add README.md boot boot.bat | ||
|
||
header "3/4 Uploading 'bootstrap' branch..." | ||
git -c user.email="[email protected]" \ | ||
-c user.name="Bootstrap Creator" \ | ||
commit --quiet --message="Initial commit" | ||
git push --force --set-upstream origin bootstrap | ||
|
||
header "4/4 Done" | ||
cat README.md |