Skip to content

Commit

Permalink
chore: Track errors from sources
Browse files Browse the repository at this point in the history
  • Loading branch information
enenumxela committed Aug 4, 2023
1 parent ae777a2 commit e6d466f
Show file tree
Hide file tree
Showing 15 changed files with 635 additions and 225 deletions.
25 changes: 15 additions & 10 deletions cmd/xurlfind3r/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,19 +302,24 @@ func mkdir(path string) {
}
}

func outputURLs(writer *bufio.Writer, URLs chan sources.URL, verbosity string) {
func outputURLs(writer *bufio.Writer, URLs chan sources.Result, verbosity string) {
for URL := range URLs {
if verbosity == string(levels.LevelSilent) {
hqgolog.Print().Msg(URL.Value)
} else {
hqgolog.Print().Msgf("[%s] %s", au.BrightBlue(URL.Source), URL.Value)
}
switch URL.Type {
case sources.Error:
hqgolog.Warn().Msgf("Could not run source %s: %s\n", URL.Source, URL.Error)
case sources.URL:
if verbosity == string(levels.LevelDebug) {
hqgolog.Print().Msgf("[%s] %s", au.BrightBlue(URL.Source), URL.Value)
} else {
hqgolog.Print().Msg(URL.Value)
}

if writer != nil {
fmt.Fprintln(writer, URL.Value)
if writer != nil {
fmt.Fprintln(writer, URL.Value)

if err := writer.Flush(); err != nil {
hqgolog.Fatal().Msg(err.Error())
if err := writer.Flush(); err != nil {
hqgolog.Fatal().Msg(err.Error())
}
}
}
}
Expand Down
6 changes: 0 additions & 6 deletions pkg/xurlfind3r/sources/URL.go

This file was deleted.

43 changes: 37 additions & 6 deletions pkg/xurlfind3r/sources/bevigil/bevigil.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,26 @@ type getURLsResponse struct {

type Source struct{}

func (source *Source) Run(config *sources.Configuration, domain string) (URLsChannel chan sources.URL) {
URLsChannel = make(chan sources.URL)
func (source *Source) Run(config *sources.Configuration, domain string) <-chan sources.Result {
results := make(chan sources.Result)

go func() {
defer close(URLsChannel)
defer close(results)

var err error

var key string

key, err = sources.PickRandom(config.Keys.Bevigil)
if key == "" || err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}

Expand All @@ -43,12 +51,29 @@ func (source *Source) Run(config *sources.Configuration, domain string) (URLsCha

getURLsRes, err = httpclient.Get(getURLsReqURL, "", getURLsReqHeaders)
if err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}

var getURLsResData getURLsResponse

if err = json.Unmarshal(getURLsRes.Body(), &getURLsResData); err != nil {
err = json.Unmarshal(getURLsRes.Body(), &getURLsResData)
if err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}

Expand All @@ -57,11 +82,17 @@ func (source *Source) Run(config *sources.Configuration, domain string) (URLsCha
continue
}

URLsChannel <- sources.URL{Source: source.Name(), Value: URL}
result := sources.Result{
Type: sources.URL,
Source: source.Name(),
Value: URL,
}

results <- result
}
}()

return
return results
}

func (source *Source) Name() string {
Expand Down
64 changes: 56 additions & 8 deletions pkg/xurlfind3r/sources/commoncrawl/commoncrawl.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ type getURLsResponse struct {

type Source struct{}

func (source *Source) Run(config *sources.Configuration, domain string) (URLsChannel chan sources.URL) {
URLsChannel = make(chan sources.URL)
func (source *Source) Run(config *sources.Configuration, domain string) <-chan sources.Result {
results := make(chan sources.Result)

if config.IncludeSubdomains {
domain = "*." + domain
}

go func() {
defer close(URLsChannel)
defer close(results)

getIndexesReqURL := "https://index.commoncrawl.org/collinfo.json"

Expand All @@ -42,12 +42,29 @@ func (source *Source) Run(config *sources.Configuration, domain string) (URLsCha

getIndexesRes, err = httpclient.SimpleGet(getIndexesReqURL)
if err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}

var getIndexesResData getIndexesResponse

if err = json.Unmarshal(getIndexesRes.Body(), &getIndexesResData); err != nil {
err = json.Unmarshal(getIndexesRes.Body(), &getIndexesResData)
if err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}

Expand All @@ -71,6 +88,14 @@ func (source *Source) Run(config *sources.Configuration, domain string) (URLsCha

getURLsRes, err = httpclient.Get(getURLsReqURL, "", getURLsReqHeaders)
if err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}

Expand All @@ -79,7 +104,16 @@ func (source *Source) Run(config *sources.Configuration, domain string) (URLsCha
for scanner.Scan() {
var getURLsResData getURLsResponse

if err = json.Unmarshal(scanner.Bytes(), &getURLsResData); err != nil {
err = json.Unmarshal(scanner.Bytes(), &getURLsResData)
if err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}

Expand All @@ -93,10 +127,24 @@ func (source *Source) Run(config *sources.Configuration, domain string) (URLsCha
continue
}

URLsChannel <- sources.URL{Source: source.Name(), Value: URL}
result := sources.Result{
Type: sources.URL,
Source: source.Name(),
Value: URL,
}

results <- result
}

if scanner.Err() != nil {
if err = scanner.Err(); err != nil {
result := sources.Result{
Type: sources.Error,
Source: source.Name(),
Error: err,
}

results <- result

return
}
}(indexData.API)
Expand All @@ -105,7 +153,7 @@ func (source *Source) Run(config *sources.Configuration, domain string) (URLsCha
wg.Wait()
}()

return
return results
}

func (source *Source) Name() string {
Expand Down
Loading

0 comments on commit e6d466f

Please sign in to comment.