Skip to content

Commit

Permalink
fixed empty folder issue, added count mechanism for irrelevant files.…
Browse files Browse the repository at this point in the history
… added sleep for queue to avoid heap overflow
  • Loading branch information
lorenyeung committed May 12, 2021
1 parent 53156c0 commit cbf9895
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 22 deletions.
19 changes: 10 additions & 9 deletions docker/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/json"
"fmt"
"go-pkgdl/auth"
"go-pkgdl/helpers"

"strings"
"time"
Expand Down Expand Up @@ -44,7 +45,7 @@ type Metadata struct {
}

//GetDockerImages Docker Engine API search
func GetDockerImages(artURL string, artUser string, artApikey string, dockerRepo string, url string, base string, index int, component string, dockerWorkerQueue *list.List, random bool, workerSleepVar int) string {
func GetDockerImages(artURL string, artUser string, artApikey string, dockerRepo string, url string, base string, index int, component string, dockerWorkerQueue *list.List, flags helpers.Flags) string {

//https://github.com/moby/moby/blob/master/client/image_search.go#L17
ctx := context.Background()
Expand All @@ -66,30 +67,30 @@ func GetDockerImages(artURL string, artUser string, artApikey string, dockerRepo
for j := 33; j <= 58; j++ {
dockerSearchStr := string(rune('A'-1+i)) + string(rune('A'-1+j))
randomSearchMap[dockerSearchStr] = "taken"
if !random {
if !flags.RandomVar {
log.Debug("Docker ordered search key:", dockerSearchStr)
results, err := cli.ImageSearch(ctx, dockerSearchStr, imageSearch)
if err != nil {
log.Error("Docker image search error:", err)
}
dockerSearch(dockerSearchStr, results, artURL, artUser, artApikey, dockerRepo, dockerWorkerQueue, workerSleepVar)
dockerSearch(dockerSearchStr, results, artURL, artUser, artApikey, dockerRepo, dockerWorkerQueue, flags)
}
}
}

//random search of docker images
if random {
if flags.RandomVar {
for key, value := range randomSearchMap {
log.Debug("Docker Random result search Key:", key, " Value:", value)
results, _ := cli.ImageSearch(ctx, key, imageSearch)
dockerSearch(key, results, artURL, artUser, artApikey, dockerRepo, dockerWorkerQueue, workerSleepVar)
dockerSearch(key, results, artURL, artUser, artApikey, dockerRepo, dockerWorkerQueue, flags)
}
}

return ""
}

func dockerSearch(search string, results []registry.SearchResult, artURL string, artUser string, artApikey string, dockerRepo string, dockerWorkerQueue *list.List, workerSleepVar int) {
func dockerSearch(search string, results []registry.SearchResult, artURL string, artUser string, artApikey string, dockerRepo string, dockerWorkerQueue *list.List, flags helpers.Flags) {
//gets name, then loops through tags

for x := range results {
Expand All @@ -111,9 +112,9 @@ func dockerSearch(search string, results []registry.SearchResult, artURL string,
log.Trace("Docker Queue pushing into queue:", dockerMd.ManifestURLFile)
dockerWorkerQueue.PushBack(dockerMd)

for dockerWorkerQueue.Len() > 75 {
log.Debug("Docker worker queue is at ", dockerWorkerQueue.Len(), ", sleeping for ", workerSleepVar, " seconds...")
time.Sleep(time.Duration(workerSleepVar) * time.Second)
for dockerWorkerQueue.Len() > flags.SleepQueueMaxVar {
log.Debug("Docker worker queue is at ", dockerWorkerQueue.Len(), ", queue max is set to ", flags.SleepQueueMaxVar, ", sleeping for ", flags.WorkerSleepVar, " seconds...")
time.Sleep(time.Duration(flags.WorkerSleepVar) * time.Second)
}
log.Trace("Queue at:", dockerWorkerQueue.Len(), ", resuming docker worker queue")
}
Expand Down
2 changes: 1 addition & 1 deletion metadata.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"script_version": "v1.0.0" }
{"script_version": "v1.1.0" }
6 changes: 3 additions & 3 deletions pkgdl/pkgdl.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ func main() {
log.Warn("Work in progress, only works against Docker Hub")
go func() {
log.Info("testing if it goes in here multiple times case repotype") //it does not
docker.GetDockerImages(creds.URL, creds.Username, creds.Apikey, flags.RepoVar, extractedURL, extractedURLStripped, 1, "", workQueue, flags.RandomVar, flags.WorkerSleepVar)
docker.GetDockerImages(creds.URL, creds.Username, creds.Apikey, flags.RepoVar, extractedURL, extractedURLStripped, 1, "", workQueue, flags)
}()

case "generic":
Expand Down Expand Up @@ -191,7 +191,7 @@ func main() {
go func() {
log.Info("rpm takes 10 seconds to init, please be patient")
//buggy. looks like there is a recursive search that screws it up
rpm.GetRpmHrefs(extractedURL, extractedURLStripped, workQueue)
rpm.GetRpmHrefs(extractedURL, extractedURLStripped, workQueue, flags)
}()

default:
Expand Down Expand Up @@ -278,7 +278,7 @@ func main() {
log.Info(repotype, " work queue is empty, sleeping for ", flags.WorkerSleepVar, " seconds...")
time.Sleep(time.Duration(flags.WorkerSleepVar) * time.Second)
count0++
if count0 > 10 {
if count0 > 50 {
log.Warn("Looks like nothing's getting put into the workqueue. You might want to enable -debug and take a look")
}
if workQueue.Len() > 0 {
Expand Down
40 changes: 31 additions & 9 deletions rpm/rpm.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ package rpm

import (
"container/list"
"fmt"
"go-pkgdl/helpers"
"net/http"
"strings"
"time"

log "github.com/sirupsen/logrus"

Expand All @@ -18,8 +18,10 @@ type Metadata struct {
File string
}

var junk int

//GetRpmHrefs parse hrefs for RPM files
func GetRpmHrefs(url string, base string, RpmWorkerQueue *list.List) string {
func GetRpmHrefs(url string, base string, RpmWorkerQueue *list.List, flags helpers.Flags) string {
resp, err := http.Get(url)
// this needs to be threaded better..
helpers.Check(err, false, "HTTP GET error", helpers.Trace())
Expand All @@ -41,35 +43,55 @@ func GetRpmHrefs(url string, base string, RpmWorkerQueue *list.List) string {
for _, a := range t.Attr {
if a.Key == "href" && (strings.HasSuffix(a.Val, "/")) && a.Val != "/" && !strings.Contains(a.Val, "://") && a.Val != "centos/" {

log.Debug("for", url+a.Val)
log.Debug("for:", url+a.Val)
if resp.StatusCode == 404 {
log.Info("stop recursion on non 200 response code for:", url+a.Val)
break
}

GetRpmHrefs(url+a.Val, base, RpmWorkerQueue)
GetRpmHrefs(url+a.Val, base, RpmWorkerQueue, flags)
break
}
}
checkRpm(t, url, base, RpmWorkerQueue)
junk = checkRpm(t, url, base, RpmWorkerQueue, flags, junk)
}
}
}
}

func checkRpm(t html.Token, url string, base string, RpmWorkerQueue *list.List) {

func checkRpm(t html.Token, url string, base string, rpmWorkerQueue *list.List, flags helpers.Flags, junk int) int {
log.Trace("received url token:", t.String())
if strings.Contains(t.String(), ".rpm") {
for _, a := range t.Attr {
if a.Key == "href" && (strings.HasSuffix(a.Val, ".rpm")) {
hrefraw := url + a.Val
href := strings.TrimPrefix(hrefraw, base)

fmt.Println("queuing download", href, a.Val, RpmWorkerQueue.Len())
log.Info("queuing ", rpmWorkerQueue.Len(), " for download:", href, a.Val)

//add RPM metadata to queue
var RpmMd Metadata
RpmMd.URL = strings.TrimPrefix(href, "/centos")
RpmMd.File = a.Val
RpmWorkerQueue.PushBack(RpmMd)
rpmWorkerQueue.PushBack(RpmMd)

for rpmWorkerQueue.Len() > flags.SleepQueueMaxVar {
log.Info("RPM worker queue is at ", rpmWorkerQueue.Len(), ", queue max is set to ", flags.SleepQueueMaxVar, ", sleeping for ", flags.WorkerSleepVar, " seconds...")
time.Sleep(time.Duration(flags.WorkerSleepVar) * time.Second)
}
log.Trace("Queue at:", rpmWorkerQueue.Len(), ", resuming RPM worker queue")
break
}
}
} else {
//there are alot of .filez types, don't want to log everything
if junk%100 == 0 {
log.Info("found ", junk, "+ files that aren't .rpm, ignoring them")
}
junk++

log.Debug("ignoring non .rpm URL received:", t.Attr)
return junk
}
return 0
}

0 comments on commit cbf9895

Please sign in to comment.