Skip to content

Commit

Permalink
reckless: Clone github sources when API access fails
Browse files Browse the repository at this point in the history
Due to the API ratelimit, this allows cloning a github repo and searching
the result rather than searching via the REST API.  If a source has
already been cloned, it is fetched and the default branch checked out.

Fixes a failure reported by @farscapian

Changelog-Fixed: Reckless no longer fails on github API ratelimit.
  • Loading branch information
endothermicdev authored and cdecker committed Feb 21, 2024
1 parent ba9ec41 commit bfb29aa
Showing 1 changed file with 113 additions and 6 deletions.
119 changes: 113 additions & 6 deletions tools/reckless
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import types
from typing import Union
from urllib.parse import urlparse
from urllib.request import urlopen
from urllib.error import HTTPError
import venv


Expand Down Expand Up @@ -144,7 +145,8 @@ class InstInfo:
target = SourceDir(self.source_loc, srctype=self.srctype)
# Set recursion for how many directories deep we should search
depth = 0
if self.srctype in [Source.DIRECTORY, Source.LOCAL_REPO]:
if self.srctype in [Source.DIRECTORY, Source.LOCAL_REPO,
Source.GIT_LOCAL_CLONE]:
depth = 5
elif self.srctype == Source.GITHUB_REPO:
depth = 1
Expand Down Expand Up @@ -193,7 +195,28 @@ class InstInfo:
return success
return None

result = search_dir(self, target, False, depth)
try:
result = search_dir(self, target, False, depth)
# Using the rest API of github.com may result in a
# "Error 403: rate limit exceeded" or other access issues.
# Fall back to cloning and searching the local copy instead.
except HTTPError:
result = None
if self.srctype == Source.GITHUB_REPO:
# clone source to reckless dir
target = copy_remote_git_source(self)
if not target:
logging.warning(f"could not clone github source {self}")
return False
logging.debug(f"falling back to cloning remote repo {self}")
# Update to reflect use of a local clone
self.source_loc = target.location
self.srctype = target.srctype
result = search_dir(self, target, False, 5)

if not result:
return False

if result:
if result != target:
if result.relative:
Expand Down Expand Up @@ -235,6 +258,8 @@ class Source(Enum):
GITHUB_REPO = 3
OTHER_URL = 4
UNKNOWN = 5
# Cloned from remote source before searching (rather than github API)
GIT_LOCAL_CLONE = 6

@classmethod
def get_type(cls, source: str):
Expand All @@ -253,6 +278,16 @@ class Source(Enum):
return cls(4)
return cls(5)

@classmethod
def get_github_user_repo(cls, source: str) -> (str, str):
'extract a github username and repository name'
if 'github.com/' not in source.lower():
return None, None
trailing = Path(source.lower().partition('github.com/')[2]).parts
if len(trailing) < 2:
return None, None
return trailing[0], trailing[1]


class SourceDir():
"""Structure to search source contents."""
Expand All @@ -277,7 +312,7 @@ class SourceDir():
# logging.debug(f"populating {self.srctype} {self.location}")
if self.srctype == Source.DIRECTORY:
self.contents = populate_local_dir(self.location)
elif self.srctype == Source.LOCAL_REPO:
elif self.srctype in [Source.LOCAL_REPO, Source.GIT_LOCAL_CLONE]:
self.contents = populate_local_repo(self.location)
elif self.srctype == Source.GITHUB_REPO:
self.contents = populate_github_repo(self.location)
Expand Down Expand Up @@ -435,6 +470,11 @@ def source_element_from_repo_api(member: dict):


def populate_github_repo(url: str) -> list:
"""populate one level of a github repository via REST API"""
# Forces search to clone remote repos (for blackbox testing)
if GITHUB_API_FALLBACK:
with tempfile.NamedTemporaryFile() as tmp:
raise HTTPError(url, 403, 'simulated ratelimit', {}, tmp)
# FIXME: This probably contains leftover cruft.
repo = url.split('/')
while '' in repo:
Expand Down Expand Up @@ -478,6 +518,28 @@ def populate_github_repo(url: str) -> list:
return contents


def copy_remote_git_source(github_source: InstInfo):
"""clone or fetch & checkout a local copy of a remote git repo"""
user, repo = Source.get_github_user_repo(github_source.source_loc)
if not user or not repo:
logging.warning('could not extract github user and repo '
f'name for {github_source.source_loc}')
return None
local_path = RECKLESS_DIR / '.remote_sources' / user
create_dir(RECKLESS_DIR / '.remote_sources')
if not create_dir(local_path):
logging.warning(f'could not provision dir {local_path} to '
f'clone remote source {github_source.source_loc}')
return None
local_path = local_path / repo
if local_path.exists():
# Fetch the latest
assert _git_update(github_source, local_path)
else:
_git_clone(github_source, local_path)
return SourceDir(local_path, srctype=Source.GIT_LOCAL_CLONE)


class Config():
"""A generic class for procuring, reading and editing config files"""
def obtain_config(self,
Expand Down Expand Up @@ -803,7 +865,8 @@ def _git_clone(src: InstInfo, dest: Union[PosixPath, str]) -> bool:
if src.srctype == Source.GITHUB_REPO:
assert 'github.com' in src.source_loc
source = f"{GITHUB_COM}" + src.source_loc.split("github.com")[-1]
elif src.srctype in [Source.LOCAL_REPO, Source.OTHER_URL]:
elif src.srctype in [Source.LOCAL_REPO, Source.OTHER_URL,
Source.GIT_LOCAL_CLONE]:
source = src.source_loc
else:
return False
Expand All @@ -819,6 +882,46 @@ def _git_clone(src: InstInfo, dest: Union[PosixPath, str]) -> bool:
return True


def _git_update(github_source: InstInfo, local_copy: PosixPath):
# Ensure this is the correct source
git = run(['git', 'remote', 'set-url', 'origin', github_source.source_loc],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False

# Fetch the latest from the remote
git = run(['git', 'fetch', 'origin', '--recurse-submodules=on-demand'],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False

# Find default branch
git = run(['git', 'symbolic-ref', 'refs/remotes/origin/HEAD', '--short'],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False
default_branch = git.stdout.splitlines()[0]
if default_branch != 'origin/master':
logging.debug(f'UNUSUAL: fetched default branch {default_branch} for '
f'{github_source.source_loc}')

# Checkout default branch
git = run(['git', 'checkout', default_branch],
cwd=str(local_copy), stdout=PIPE, stderr=PIPE, text=True,
check=False, timeout=60)
assert git.returncode == 0
if git.returncode != 0:
return False

return True


def get_temp_reckless_dir() -> PosixPath:
random_dir = 'reckless-{}'.format(str(hash(os.times()))[-9:])
new_path = Path(tempfile.gettempdir()) / random_dir
Expand Down Expand Up @@ -850,7 +953,7 @@ def _checkout_commit(orig_src: InstInfo,
cloned_path: PosixPath):
# Check out and verify commit/tag if source was a repository
if orig_src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO,
Source.OTHER_URL]:
Source.OTHER_URL, Source.GIT_LOCAL_CLONE]:
if orig_src.commit:
logging.debug(f"Checking out {orig_src.commit}")
checkout = Popen(['git', 'checkout', orig_src.commit],
Expand Down Expand Up @@ -912,7 +1015,7 @@ def _install_plugin(src: InstInfo) -> Union[InstInfo, None]:
create_dir(clone_path)
shutil.copytree(src.source_loc, plugin_path)
elif src.srctype in [Source.LOCAL_REPO, Source.GITHUB_REPO,
Source.OTHER_URL]:
Source.OTHER_URL, Source.GIT_LOCAL_CLONE]:
# clone git repository to /tmp/reckless-...
if not _git_clone(src, plugin_path):
return None
Expand Down Expand Up @@ -1401,6 +1504,10 @@ if __name__ == '__main__':
GITHUB_COM = os.environ['REDIR_GITHUB']
logging.root.setLevel(args.loglevel)

GITHUB_API_FALLBACK = False
if 'GITHUB_API_FALLBACK' in os.environ:
GITHUB_API_FALLBACK = os.environ['GITHUB_API_FALLBACK']

if 'targets' in args:
# FIXME: Catch missing argument
if args.func.__name__ == 'help_alias':
Expand Down

0 comments on commit bfb29aa

Please sign in to comment.