Skip to content

Commit

Permalink
Support GitHub blobs and gists by transforming normal to "raw" URLs. (s…
Browse files Browse the repository at this point in the history
…treamlit#645)

* starting conditions from @aritropaul's PR

* added test scaffolding

* snakecase and mild refactor of function

* refactor into util.py

* added fixture structure

* Update bug_report.md

* Update feature_request.md

* rebasing cli.py to streamlit/develop to fix missing commits

* linting

* fleshed out the tests. should collect more fixtures.

* working regular expression and tests. needs confirmation.

* copied in yarn.lock from streamlit/streamlit/develop

* make sure not to spuriously replace "blob", eg. username "theblob"

* mild function reorg and comment style change
  • Loading branch information
nthmost authored Nov 8, 2019
1 parent c9571f3 commit 2001130
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 1 deletion.
7 changes: 6 additions & 1 deletion lib/streamlit/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import streamlit
from streamlit.credentials import Credentials
from streamlit import version
from streamlit import util
import streamlit.bootstrap as bootstrap
from streamlit.case_converters import to_snake_case

Expand Down Expand Up @@ -209,10 +210,14 @@ def main_run(target, args=None, **kwargs):

if url(target):
from streamlit.temporary_directory import TemporaryDirectory

with TemporaryDirectory() as temp_dir:
from urllib.parse import urlparse

path = urlparse(target).path
script_path = os.path.join(temp_dir, path.strip('/').rsplit('/', 1)[-1])
script_path = os.path.join(temp_dir, path.strip("/").rsplit("/", 1)[-1])
# if this is a GitHub/Gist blob url, convert to a raw URL first.
target = util.process_gitblob_url(target)
_download_remote(script_path, target)
_main_run(script_path, args)
else:
Expand Down
32 changes: 32 additions & 0 deletions lib/streamlit/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,13 @@
# URL of Streamlit's help page.
HELP_DOC = "https://streamlit.io/docs/"

# Regular expression for process_gitblob_url
GITBLOB_RE = re.compile(
"(?P<base>https:\/\/?(gist.)?github.com\/)"
"(?P<account>([\w\.]+\/){1,2})"
"(?P<blob_or_raw>(blob|raw))?"
"(?P<suffix>(.+)?)")


def _decode_ascii(string):
"""Decodes a string as ascii."""
Expand Down Expand Up @@ -451,6 +458,31 @@ def print_url(title, url):
click.secho(url, bold=True)


def process_gitblob_url(url):
"""Check url to see if it describes a GitHub Gist "blob" URL.
If so, returns a new URL to get the "raw" script.
If not, returns URL unchanged.
"""
# Matches github.com and gist.github.com. Will not match githubusercontent.com.
# See this regex with explainer and sample text here: https://regexr.com/4odk3
match = GITBLOB_RE.match(url)
if match:
mdict = match.groupdict()
# If it has "blob" in the url, replace this with "raw" and we're done.
if mdict["blob_or_raw"] == "blob":
return "{base}{account}raw{suffix}".format(**mdict)

# If it is a "raw" url already, return untouched.
if mdict["blob_or_raw"] == "raw":
return url

# It's a gist. Just tack "raw" on the end.
return url + "/raw"

return url


def get_hostname(url):
"""Return the hostname of a URL (with or without protocol)."""
# Just so urllib can parse the URL, make sure there's a protocol.
Expand Down
40 changes: 40 additions & 0 deletions lib/tests/streamlit/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,43 @@ def test_file_not_in_folder_glob(self):
def test_rel_file_not_in_folder_glob(self):
ret = util.file_is_in_folder_glob("foo.py", "**/f")
self.assertFalse(ret)


class GitHubUrlTest(unittest.TestCase):
GITHUB_URLS = [('https://github.com/aritropaul/streamlit/blob/b72adbcf00c91775db14e739e2ea33d6df9079c3/lib/streamlit/cli.py',
'https://github.com/aritropaul/streamlit/raw/b72adbcf00c91775db14e739e2ea33d6df9079c3/lib/streamlit/cli.py'),
('https://github.com/streamlit/streamlit/blob/develop/examples/video.py',
'https://github.com/streamlit/streamlit/raw/develop/examples/video.py'),
('https://github.com/text2gene/text2gene/blob/master/sbin/clinvar.hgvs_citations.sql',
'https://github.com/text2gene/text2gene/raw/master/sbin/clinvar.hgvs_citations.sql'),
('https://github.com/mekarpeles/math.mx/blob/master/README.md',
'https://github.com/mekarpeles/math.mx/raw/master/README.md'),
]

GIST_URLS = [('https://gist.github.com/nthmost/b521b80fbd834e67b3f5e271e9548232',
'https://gist.github.com/nthmost/b521b80fbd834e67b3f5e271e9548232/raw'),
('https://gist.github.com/scottyallen/1888e058261fc21f184f6be192bbe131',
'https://gist.github.com/scottyallen/1888e058261fc21f184f6be192bbe131/raw'),
('https://gist.github.com/tvst/faf057abbedaccaa70b48216a1866cdd',
'https://gist.github.com/tvst/faf057abbedaccaa70b48216a1866cdd/raw'),
]

INVALID_URLS = ['blah', 'google.com', 'http://homestarrunner.com',
'https://somethinglikegithub.com/withablob',
'gist.github.com/nothing',
'https://raw.githubusercontent.com/streamlit/streamlit/develop/examples/video.py',
'streamlit.io/raw/blob',
]

def test_github_url_is_replaced(self):
for (target, processed) in self.GITHUB_URLS:
assert util.process_gitblob_url(target) == processed

def test_gist_url_is_replaced(self):
for (target, processed) in self.GIST_URLS:
assert util.process_gitblob_url(target) == processed

def test_nonmatching_url_is_not_replaced(self):
for url in self.INVALID_URLS:
assert url == util.process_gitblob_url(url)

0 comments on commit 2001130

Please sign in to comment.