-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
95 lines (88 loc) · 2.85 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
package main
import (
"encoding/json"
"fmt"
"io/ioutil"
"math/rand"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
func main() {
r := ranking()
output := strings.Join(r, "\n")
ioutil.WriteFile("output.csv", []byte(output), 0644)
}
func ranking() []string {
rand.Seed(time.Now().UnixNano())
header := "name, bio, country, Kaggle, Twitter, LinkedIn, Github, Blog"
results := []string{header}
for i := 0; i < 250; i++ {
ranking, err := goquery.NewDocument("http://localhost:8050/render.html?url=https%3A%2F%2Fwww.kaggle.com%2Frankings.json%3Fgroup%3Dcompetitions%26page%3D" + fmt.Sprint(i+1) + "%26pageSize%3D20&timeout=10&wait=5")
if err != nil {
fmt.Println(err)
}
r := ranking.Text()
var decodeData interface{}
_ = json.Unmarshal([]byte(r), &decodeData)
d := decodeData.(map[string]interface{})
if d["list"] == nil {
fmt.Println("Skip because of d[list] is nil.")
continue
}
list := d["list"].([]interface{})
for _, rank := range list {
userURL := rank.(map[string]interface{})["userUrl"]
result := user(fmt.Sprintf("https://www.kaggle.com%s", userURL))
if result != "" {
results = append(results, result)
}
time.Sleep(time.Duration(rand.Intn(5)+1) * time.Second)
}
}
fmt.Printf("%v\n", results)
return results
}
func user(url string) string {
user, err := goquery.NewDocument(fmt.Sprintf("http://localhost:8050/render.html?url=%s&timeout=10&wait=5", url))
if err != nil {
fmt.Printf("NewDocument error. %v\n", err)
return ""
}
s := user.Find("body > main > div > div.site-layout__main-content > script").Text()
if len(strings.Split(s, "Kaggle.State.push(")) < 2 {
fmt.Println("Skip because of insufficient data.")
return ""
}
s = strings.Split(strings.Split(s, "Kaggle.State.push(")[1], ");")[0]
var decodeData interface{}
_ = json.Unmarshal([]byte(s), &decodeData)
if decodeData == nil {
fmt.Println("Skip because data is nil.")
return ""
}
d := decodeData.(map[string]interface{})
country := fmt.Sprintf("%s", d["country"])
if country != "Japan" && country != "JP" && country != "日本" {
fmt.Printf("Skip because of not a Japanese. %s\n", country)
return ""
}
result := fmt.Sprintf("%v, \"%v\", %v, %v, %v, %v, %v, %v",
d["displayName"],
strings.Replace(strings.Replace(fmt.Sprint(d["bio"]), "\n", " ", -1), ",", ";", -1), d["country"], url,
fmt.Sprintf("https://twitter.com/%s", d["twitterUserName"]),
d["linkedInUrl"],
fmt.Sprintf("https://github.com/%s", d["gitHubUserName"]),
d["websiteUrl"])
fmt.Printf("%s\n", d["displayName"])
fmt.Printf("%s\n", d["country"])
fmt.Printf("%s\n", d["linkedInUrl"])
fmt.Printf("%s\n", d["gitHubUserName"])
fmt.Printf("%s\n", d["twitterUserName"])
fmt.Printf("%s\n", d["websiteUrl"])
fmt.Printf("%s\n", d["organization"])
fmt.Printf("%s\n", d["bio"])
fmt.Printf("%s\n", d["userAvatarUrl"])
fmt.Printf("%s\n", d["email"])
return result
}