Skip to content

Commit

Permalink
change pvl regex opts format
Browse files Browse the repository at this point in the history
  • Loading branch information
mlsteele committed Sep 2, 2016
1 parent eae7a41 commit 3488648
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 67 deletions.
48 changes: 33 additions & 15 deletions go/pvl/hardcoded.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,71 +18,89 @@ var hardcodedPVLString = `
"revision": 1,
"services": {
"coinbase": [
{ "assert_regex_match": "^https://coinbase\\.com/%{username_service}/public-key$i" },
{
"assert_regex_match": "^https://coinbase\\.com/%{username_service}/public-key$",
"case_insensitive": true
},
{ "fetch": "html" },
{ "selector_css": ["pre.statement", 0] },
{ "assert_find_base64": "sig" }
],
"dns": [{ "assert_regex_match": "^keybase-site-verification=%{sig_id_medium}$" }],
"generic_web_site": [
{ "assert_regex_match": "^https?://%{hostname}/(?:\\.well-known/keybase\\.txt|keybase\\.txt)$i" },
{
"assert_regex_match": "^https?://%{hostname}/(?:\\.well-known/keybase\\.txt|keybase\\.txt)$",
"case_insensitive": true
},
{ "fetch": "string" },
{ "assert_find_base64": "sig" }
],
"github": [
{ "assert_regex_match": "^https://gist\\.github(usercontent)?\\.com/%{username_service}/.*$i" },
{
"assert_regex_match": "^https://gist\\.github(usercontent)?\\.com/%{username_service}/.*$",
"case_insensitive": true
},
{ "fetch": "string" },
{ "assert_find_base64": "sig" }
],
"hackernews": [
{
"assert_regex_match": "^https://hacker-news\\.firebaseio\\.com/v0/user/%{username_service}/about.json$i"
"assert_regex_match": "^https://hacker-news\\.firebaseio\\.com/v0/user/%{username_service}/about.json$",
"case_insensitive": true
},
{ "fetch": "string" },
{ "assert_regex_match": "^.*%{sig_id_medium}.*$" }
],
"reddit": [
{ "assert_regex_match": "^https://(:?www\\.)?reddit\\.com/r/keybaseproofs/.*$i" },
{
"assert_regex_match": "^https://(:?www\\.)?reddit\\.com/r/keybaseproofs/.*$",
"case_insensitive": true
},
{ "fetch": "json" },
{ "selector_json": [0, "kind"] },
{ "assert_regex_match": "^Listing$" },
{ "selector_json": [0, "data", "children", 0, "kind"] },
{ "assert_regex_match": "^t3$" },
{ "selector_json": [0, "data", "children", 0, "data", "subreddit"] },
{ "assert_regex_match": "^keybaseproofs$i" },
{ "assert_regex_match": "^keybaseproofs$", "case_insensitive": true },
{ "selector_json": [0, "data", "children", 0, "data", "author"] },
{ "assert_regex_match": "^%{username_service}$i" },
{ "assert_regex_match": "^%{username_service}$", "case_insensitive": true },
{ "selector_json": [0, "data", "children", 0, "data", "title"] },
{ "assert_regex_match": "^.*%{sig_id_medium}.*$i" },
{ "assert_regex_match": "^.*%{sig_id_medium}.*$", "case_insensitive": true },
{ "selector_json": [0, "data", "children", 0, "data", "selftext"] },
{ "assert_find_base64": "sig" }
],
"rooter": [
{ "assert_regex_match": "^https?://[\\w:_\\-\\.]+/_/api/1\\.0/rooter/%{username_service}/.*$i" },
{
"assert_regex_match": "^https?://[\\w:_\\-\\.]+/_/api/1\\.0/rooter/%{username_service}/.*$",
"case_insensitive": true
},
{ "fetch": "json" },
{ "selector_json": ["status", "name"] },
{ "assert_regex_match": "^ok$i" },
{ "assert_regex_match": "^ok$", "case_insensitive": true },
{ "selector_json": ["toot", "post"] },
{ "assert_regex_match": "^.*%{sig_id_medium}.*$" }
],
"twitter": [
{ "assert_regex_match": "^https://twitter\\.com/%{username_service}/.*$i" },
{ "assert_regex_match": "^https://twitter\\.com/%{username_service}/.*$", "case_insensitive": true },
{ "fetch": "html" },
{
"attr": "data-screen-name",
"selector_css": ["div.permalink-tweet-container div.permalink-tweet", 0]
},
{ "assert_regex_match": "^%{username_service}$i" },
{ "assert_regex_match": "^%{username_service}$", "case_insensitive": true },
{ "selector_css": ["div.permalink-tweet-container div.permalink-tweet", 0, "p.tweet-text", 0] },
{ "whitespace_normalize": true },
{
"regex_capture": "^ *(?:@[a-zA-Z0-9_-]+\\s*)* *Verifying myself: I am ([A-Za-z0-9_]+) on Keybase\\.io\\. (?:\\S+) */.*$i"
"case_insensitive": true,
"regex_capture": "^ *(?:@[a-zA-Z0-9_-]+\\s*)* *Verifying myself: I am ([A-Za-z0-9_]+) on Keybase\\.io\\. (?:\\S+) */.*$"
},
{ "assert_regex_match": "^%{username_keybase}$i" },
{ "assert_regex_match": "^%{username_keybase}$", "case_insensitive": true },
{ "selector_css": ["div.permalink-tweet-container div.permalink-tweet", 0, "p.tweet-text", 0] },
{ "whitespace_normalize": true },
{
"regex_capture": "^ *(?:@[a-zA-Z0-9_-]+\\s*)* *Verifying myself: I am (?:[A-Za-z0-9_]+) on Keybase\\.io\\. (\\S+) */.*$i"
"case_insensitive": true,
"regex_capture": "^ *(?:@[a-zA-Z0-9_-]+\\s*)* *Verifying myself: I am (?:[A-Za-z0-9_]+) on Keybase\\.io\\. (\\S+) */.*$"
},
{ "assert_regex_match": "^%{sig_id_short}$" }
]
Expand Down
138 changes: 86 additions & 52 deletions go/pvl/interp.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,17 @@ type FetchResult struct {
JSON *jsonw.Wrapper
}

type RegexDescriptor struct {
Template string
CaseInsensitive bool
MultiLine bool
}

const (
RegexKeyCaseInsensitive string = "case_insensitive"
RegexKeyMultiLine string = "multiline"
)

type Mode string

const (
Expand Down Expand Up @@ -478,19 +489,18 @@ func stepInstruction(g ProofContextExt, ins *jsonw.Wrapper, state ScriptState) (
}

func stepAssertRegexMatch(g ProofContextExt, ins *jsonw.Wrapper, state ScriptState) (ScriptState, libkb.ProofError) {
template, err := ins.AtKey(string(CmdAssertRegexMatch)).GetString()
rdesc, err := extractRegexDescriptor(g, state, ins, string(CmdAssertRegexMatch))
if err != nil {
return state, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not get regex template")
return state, err
}
re, perr := interpretRegex(g, template, state)
if perr != nil {
return state, perr
re, err := interpretRegex(g, state, rdesc)
if err != nil {
return state, err
}
if !re.MatchString(state.ActiveString) {
debugWithState(g, state, "Regex did not match:\n %v\n %v\n %v", template, re, state.ActiveString)
debugWithState(g, state, "Regex did not match:\n %v\n %v\n %v", rdesc.Template, re, state.ActiveString)
return state, libkb.NewProofError(keybase1.ProofStatus_CONTENT_FAILURE,
"Regex did not match (%v)", template)
"Regex did not match (%v)", rdesc.Template)
}

return state, nil
Expand Down Expand Up @@ -519,22 +529,21 @@ func stepWhitespaceNormalize(g ProofContextExt, ins *jsonw.Wrapper, state Script
}

func stepRegexCapture(g ProofContextExt, ins *jsonw.Wrapper, state ScriptState) (ScriptState, libkb.ProofError) {
template, err := ins.AtKey(string(CmdRegexCapture)).GetString()
rdesc, err := extractRegexDescriptor(g, state, ins, string(CmdRegexCapture))
if err != nil {
return state, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"not get regex template")
return state, err
}
re, perr := interpretRegex(g, template, state)
if perr != nil {
return state, perr
re, err := interpretRegex(g, state, rdesc)
if err != nil {
return state, err
}
match := re.FindStringSubmatch(state.ActiveString)
// Assert that the match matched and has at least one capture group.
if len(match) < 2 {
debugWithState(g, state, "Regex capture did not match enough:\n %v\n %v\n %v\n %v",
template, re, state.ActiveString, match)
rdesc.Template, re, state.ActiveString, match)
return state, libkb.NewProofError(keybase1.ProofStatus_CONTENT_FAILURE,
"Regex capture did not match (%v)", template)
"Regex capture did not match (%v)", rdesc.Template)
}
state.ActiveString = match[1]
return state, nil
Expand Down Expand Up @@ -673,29 +682,28 @@ func stepSelectorCSS(g ProofContextExt, ins *jsonw.Wrapper, state ScriptState) (
}

func stepTransformURL(g ProofContextExt, ins *jsonw.Wrapper, state ScriptState) (ScriptState, libkb.ProofError) {
sourceTemplate, err := ins.AtKey(string(CmdTransformURL)).GetString()
if err != nil {
return state, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not get regex template for transformation")
sourceRdesc, perr := extractRegexDescriptor(g, state, ins, string(CmdTransformURL))
if perr != nil {
return state, perr
}
re, perr := interpretRegex(g, state, sourceRdesc)
if perr != nil {
return state, perr
}

destTemplate, err := ins.AtKey("to").GetString()
if err != nil {
return state, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not get dest pattern for transformation")
}

re, perr := interpretRegex(g, sourceTemplate, state)
if perr != nil {
return state, perr
}

match := re.FindStringSubmatch(state.FetchURL)
if len(match) < 1 {
libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Regex transform did not match:\n %v\n %v\n %v",
sourceTemplate, re, state.FetchURL)
sourceRdesc.Template, re, state.FetchURL)
return state, libkb.NewProofError(keybase1.ProofStatus_CONTENT_FAILURE,
"Regex transform did not match: %v", sourceTemplate)
"Regex transform did not match: %v", sourceRdesc.Template)
}

newURL, err := substitute(destTemplate, state, match)
Expand Down Expand Up @@ -811,39 +819,65 @@ func runSelectorJSONInner(g ProofContextExt, state ScriptState, object *jsonw.Wr
"Selector entry not recognized: %v", selector)
}

// Take a template, substitute variables, and build the Regexp.
func interpretRegex(g ProofContextExt, template string, state ScriptState) (*regexp.Regexp, libkb.ProofError) {
var perr libkb.ProofError = libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not build regex %v", template)
func extractRegexDescriptor(g ProofContextExt, state ScriptState, obj *jsonw.Wrapper, templatekey string) (RegexDescriptor, libkb.ProofError) {
var desc RegexDescriptor

// Parse out bookends (^$) and option letters.
if !strings.HasPrefix(template, "^") {
return nil, perr
template, err := obj.AtKey(templatekey).GetString()
if err != nil {
debugWithState(g, state, "Could not get regex template: %v", err)
return RegexDescriptor{}, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not get regex template")
}
lastDollar := strings.LastIndex(template, "$")
if lastDollar == -1 {
return nil, perr
desc.Template = template

if !obj.AtKey(RegexKeyCaseInsensitive).IsNil() {
caseinsensitive, err := obj.AtKey(RegexKeyCaseInsensitive).GetBool()
if err != nil {
return RegexDescriptor{}, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Regex option '%v' must be a bool", RegexKeyCaseInsensitive)
}
desc.CaseInsensitive = caseinsensitive
}
opts := template[lastDollar+1:]
if !regexp.MustCompile("[imsU]*").MatchString(opts) {
return nil, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not build regex: %v (%v)", "invalid options", template)

if !obj.AtKey(RegexKeyMultiLine).IsNil() {
multiline, err := obj.AtKey(RegexKeyMultiLine).GetBool()
if err != nil {
return RegexDescriptor{}, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Regex option '%v' must be a bool", RegexKeyMultiLine)
}
desc.MultiLine = multiline
}
var prefix = ""
optmap := make(map[rune]bool)
for _, opt := range opts {
optmap[opt] = true

return desc, nil
}

// Take a regex descriptor, do variable substitution, and build a regex.
func interpretRegex(g ProofContextExt, state ScriptState, rdesc RegexDescriptor) (*regexp.Regexp, libkb.ProofError) {
// Check that regex is bounded by "^" and "$".
if !strings.HasPrefix(rdesc.Template, "^") {
return nil, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not build regex: %v (%v)", "must start with '^'", rdesc.Template)
}
if len(opts) != len(optmap) {
if !strings.HasSuffix(rdesc.Template, "$") {
return nil, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not build regex: %v (%v)", "duplicate options", template)
"Could not build regex: %v (%v)", "must end with '$'", rdesc.Template)
}

var optstring = ""
if rdesc.CaseInsensitive {
optstring += "i"
}
if len(opts) > 0 {
prefix = "(?" + opts + ")"
if rdesc.MultiLine {
optstring += "m"
}

var prefix = ""
if len(optstring) > 0 {
prefix = "(?" + optstring + ")"
}

// Do variable interpolation.
prepattern, perr := substitute(template[0:lastDollar+1], state, nil)
prepattern, perr := substitute(rdesc.Template, state, nil)
if perr != nil {
return nil, perr
}
Expand All @@ -852,9 +886,9 @@ func interpretRegex(g ProofContextExt, template string, state ScriptState) (*reg
// Build the regex.
re, err := regexp.Compile(pattern)
if err != nil {
debugWithState(g, state, "Could not compile regex: %v\n %v\n %v", err, template, pattern)
debugWithState(g, state, "Could not compile regex: %v\n %v\n %v", err, rdesc.Template, pattern)
return nil, libkb.NewProofError(keybase1.ProofStatus_INVALID_PVL,
"Could not compile regex: %v (%v)", err, template)
"Could not compile regex: %v (%v)", err, rdesc.Template)
}
return re, nil
}
Expand Down

0 comments on commit 3488648

Please sign in to comment.