Skip to content

Commit

Permalink
Add SearchProfiles
Browse files Browse the repository at this point in the history
Minor changes and fixes
  • Loading branch information
Alexander Sheiko committed Apr 22, 2021
1 parent f3597d0 commit 5032ecd
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 100 deletions.
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ Options:
* `twitterscraper.SearchLatest` - live mode
* `twitterscraper.SearchPhotos` - image mode
* `twitterscraper.SearchVideos` - video mode
* `twitterscraper.SearchUsers` - user mode

### Get profile

Expand All @@ -124,6 +125,28 @@ func main() {
}
```

### Search profiles by query

```golang
package main

import (
"context"
"fmt"
twitterscraper "github.com/n0madic/twitter-scraper"
)

func main() {
scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
for profile := range scraper.SearchUsers(context.Background(), "Twitter", 50) {
if profile.Error != nil {
panic(profile.Error)
}
fmt.Println(profile.Name)
}
}
```

### Get trends

```golang
Expand Down
42 changes: 4 additions & 38 deletions api.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,42 +11,6 @@ import (

const bearerToken string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"

type user struct {
Data struct {
User struct {
RestID string `json:"rest_id"`
Legacy struct {
CreatedAt string `json:"created_at"`
Description string `json:"description"`
Entities struct {
URL struct {
Urls []struct {
ExpandedURL string `json:"expanded_url"`
} `json:"urls"`
} `json:"url"`
} `json:"entities"`
FavouritesCount int `json:"favourites_count"`
FollowersCount int `json:"followers_count"`
FriendsCount int `json:"friends_count"`
IDStr string `json:"id_str"`
ListedCount int `json:"listed_count"`
Name string `json:"name"`
Location string `json:"location"`
PinnedTweetIdsStr []string `json:"pinned_tweet_ids_str"`
ProfileBannerURL string `json:"profile_banner_url"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
Protected bool `json:"protected"`
ScreenName string `json:"screen_name"`
StatusesCount int `json:"statuses_count"`
Verified bool `json:"verified"`
} `json:"legacy"`
} `json:"user"`
} `json:"data"`
Errors []struct {
Message string `json:"message"`
} `json:"errors"`
}

// Global cache for user IDs
var cacheIDs sync.Map

Expand All @@ -70,7 +34,8 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {

// private profiles return forbidden, but also data
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
return fmt.Errorf("response status %s", resp.Status)
content, _ := ioutil.ReadAll(resp.Body)
return fmt.Errorf("response status %s: %s", resp.Status, content)
}

if resp.Header.Get("X-Rate-Limit-Remaining") == "0" {
Expand All @@ -95,7 +60,8 @@ func (s *Scraper) GetGuestToken() error {
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("response status %s", resp.Status)
content, _ := ioutil.ReadAll(resp.Body)
return fmt.Errorf("response status %s: %s", resp.Status, content)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
Expand Down
47 changes: 14 additions & 33 deletions profile.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,18 @@ type Profile struct {
Website string
}

type user struct {
Data struct {
User struct {
RestID string `json:"rest_id"`
Legacy legacyUser `json:"legacy"`
} `json:"user"`
} `json:"data"`
Errors []struct {
Message string `json:"message"`
} `json:"errors"`
}

// GetProfile return parsed user profile.
func (s *Scraper) GetProfile(username string) (Profile, error) {
var jsn user
Expand All @@ -50,44 +62,13 @@ func (s *Scraper) GetProfile(username string) (Profile, error) {
if jsn.Data.User.RestID == "" {
return Profile{}, fmt.Errorf("rest_id not found")
}
jsn.Data.User.Legacy.IDStr = jsn.Data.User.RestID

if jsn.Data.User.Legacy.ScreenName == "" {
return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
}

user := jsn.Data.User.Legacy

profile := Profile{
Avatar: user.ProfileImageURLHTTPS,
Banner: user.ProfileBannerURL,
Biography: user.Description,
FollowersCount: user.FollowersCount,
FollowingCount: user.FavouritesCount,
FriendsCount: user.FriendsCount,
IsPrivate: user.Protected,
IsVerified: user.Verified,
LikesCount: user.FavouritesCount,
ListedCount: user.ListedCount,
Location: user.Location,
Name: user.Name,
PinnedTweetIDs: user.PinnedTweetIdsStr,
TweetsCount: user.StatusesCount,
URL: "https://twitter.com/" + user.ScreenName,
UserID: jsn.Data.User.RestID,
Username: user.ScreenName,
}

tm, err := time.Parse(time.RubyDate, user.CreatedAt)
if err == nil {
tm = tm.UTC()
profile.Joined = &tm
}

if len(user.Entities.URL.Urls) > 0 {
profile.Website = user.Entities.URL.Urls[0].ExpandedURL
}

return profile, nil
return parseProfile(jsn.Data.User.Legacy), nil
}

// GetProfile wrapper for default scraper
Expand Down
2 changes: 2 additions & 0 deletions scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ const (
SearchPhotos
// SearchVideos - video mode
SearchVideos
// SearchUsers - user mode
SearchUsers
)

var defaultScraper *Scraper
Expand Down
52 changes: 41 additions & 11 deletions search.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,40 @@ import (
)

// SearchTweets returns channel with tweets for a given search query
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
return getTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
return getTweetTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
}

// SearchTweets wrapper for default Scraper
func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
return defaultScraper.SearchTweets(ctx, query, maxTweetsNbr)
}

// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
// SearchProfiles returns channel with profiles for a given search query
func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
return getUserTimeline(ctx, query, maxProfilesNbr, s.FetchSearchProfiles)
}

// SearchProfiles wrapper for default Scraper
func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
return defaultScraper.SearchProfiles(ctx, query, maxProfilesNbr)
}

// getSearchTimeline gets results for a given search query, via the Twitter frontend API
func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
query = url.PathEscape(query)
if maxTweetsNbr > 100 {
maxTweetsNbr = 100
if maxNbr > 50 {
maxNbr = 50
}

req, err := s.newRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json")
if err != nil {
return nil, "", err
return nil, err
}

q := req.URL.Query()
q.Add("q", query)
q.Add("count", strconv.Itoa(maxTweetsNbr))
q.Add("count", strconv.Itoa(maxNbr))
q.Add("query_source", "typed_query")
q.Add("pc", "1")
q.Add("spelling_corrections", "1")
Expand All @@ -44,16 +54,36 @@ func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor strin
q.Add("result_filter", "image")
case SearchVideos:
q.Add("result_filter", "video")
case SearchUsers:
q.Add("result_filter", "user")
}

req.URL.RawQuery = q.Encode()

var timeline timeline
err = s.RequestAPI(req, &timeline)
if err != nil {
return nil, "", err
return nil, err
}
return &timeline, nil
}

tweets, nextCursor := parseTimeline(&timeline)
// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
timeline, err := s.getSearchTimeline(query, maxTweetsNbr, cursor)
if err != nil {
return nil, "", err
}
tweets, nextCursor := parseTimeline(timeline)
return tweets, nextCursor, nil
}

// FetchSearchProfiles gets users for a given search query, via the Twitter frontend API
func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error) {
timeline, err := s.getSearchTimeline(query, maxProfilesNbr, cursor)
if err != nil {
return nil, "", err
}
users, nextCursor := parseUsers(timeline)
return users, nextCursor, nil
}
36 changes: 31 additions & 5 deletions search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,50 @@ func TestFetchSearchCursor(t *testing.T) {
t.Fatal(err)
}
if cursor == "" {
t.Fatal("Expected search cursor is not empty")
t.Fatal("Expected search cursor is empty")
}
tweetsNbr += len(tweets)
nextCursor = cursor
}
}

func TestGetSearchProfiles(t *testing.T) {
count := 0
maxProfilesNbr := 150
dupcheck := make(map[string]bool)
scraper := New().SetSearchMode(SearchUsers)
for profile := range scraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
if profile.Error != nil {
t.Error(profile.Error)
} else {
count++
if profile.UserID == "" {
t.Error("Expected UserID is empty")
} else {
if dupcheck[profile.UserID] {
t.Errorf("Detect duplicated UserID: %s", profile.UserID)
} else {
dupcheck[profile.UserID] = true
}
}
}
}

if count != maxProfilesNbr {
t.Errorf("Expected profiles count=%v, got: %v", maxProfilesNbr, count)
}
}
func TestGetSearchTweets(t *testing.T) {
count := 0
maxTweetsNbr := 250
maxTweetsNbr := 150
dupcheck := make(map[string]bool)
for tweet := range SearchTweets(context.Background(), "twitter -filter:retweets", maxTweetsNbr) {
if tweet.Error != nil {
t.Error(tweet.Error)
} else {
count++
if tweet.ID == "" {
t.Error("Expected tweet ID is not empty")
t.Error("Expected tweet ID is empty")
} else {
if dupcheck[tweet.ID] {
t.Errorf("Detect duplicated tweet ID: %s", tweet.ID)
Expand All @@ -42,13 +68,13 @@ func TestGetSearchTweets(t *testing.T) {
}
}
if tweet.PermanentURL == "" {
t.Error("Expected tweet PermanentURL is not empty")
t.Error("Expected tweet PermanentURL is empty")
}
if tweet.IsRetweet {
t.Error("Expected tweet IsRetweet is false")
}
if tweet.Text == "" {
t.Error("Expected tweet Text is not empty")
t.Error("Expected tweet Text is empty")
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions tweets.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ import (
)

// GetTweets returns channel with tweets for a given user.
func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
return getTimeline(ctx, user, maxTweetsNbr, s.FetchTweets)
func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult {
return getTweetTimeline(ctx, user, maxTweetsNbr, s.FetchTweets)
}

// GetTweets wrapper for default Scraper
func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult {
return defaultScraper.GetTweets(ctx, user, maxTweetsNbr)
}

Expand Down
Loading

0 comments on commit 5032ecd

Please sign in to comment.