Add SearchProfiles

Minor changes and fixes
masa-finance · Apr 22, 2021 · 5032ecd · 5032ecd
1 parent f3597d0
commit 5032ecd
Show file tree

Hide file tree

Showing 9 changed files with 268 additions and 100 deletions.
diff --git a/README.md b/README.md
@@ -103,6 +103,7 @@ Options:
 * `twitterscraper.SearchLatest` - live mode
 * `twitterscraper.SearchPhotos` - image mode
 * `twitterscraper.SearchVideos` - video mode
+* `twitterscraper.SearchUsers` - user mode
 
 ### Get profile
 
@@ -124,6 +125,28 @@ func main() {
 }
 ```
 
+### Search profiles by query
+
+```golang
+package main
+
+import (
+    "context"
+    "fmt"
+    twitterscraper "github.com/n0madic/twitter-scraper"
+)
+
+func main() {
+    scraper := twitterscraper.New().SetSearchMode(twitterscraper.SearchUsers)
+    for profile := range scraper.SearchUsers(context.Background(), "Twitter", 50) {
+        if profile.Error != nil {
+            panic(profile.Error)
+        }
+        fmt.Println(profile.Name)
+    }
+}
+```
+
 ### Get trends
 
 ```golang

diff --git a/api.go b/api.go
@@ -11,42 +11,6 @@ import (
 
 const bearerToken string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
 
-type user struct {
-	Data struct {
-		User struct {
-			RestID string `json:"rest_id"`
-			Legacy struct {
-				CreatedAt   string `json:"created_at"`
-				Description string `json:"description"`
-				Entities    struct {
-					URL struct {
-						Urls []struct {
-							ExpandedURL string `json:"expanded_url"`
-						} `json:"urls"`
-					} `json:"url"`
-				} `json:"entities"`
-				FavouritesCount      int      `json:"favourites_count"`
-				FollowersCount       int      `json:"followers_count"`
-				FriendsCount         int      `json:"friends_count"`
-				IDStr                string   `json:"id_str"`
-				ListedCount          int      `json:"listed_count"`
-				Name                 string   `json:"name"`
-				Location             string   `json:"location"`
-				PinnedTweetIdsStr    []string `json:"pinned_tweet_ids_str"`
-				ProfileBannerURL     string   `json:"profile_banner_url"`
-				ProfileImageURLHTTPS string   `json:"profile_image_url_https"`
-				Protected            bool     `json:"protected"`
-				ScreenName           string   `json:"screen_name"`
-				StatusesCount        int      `json:"statuses_count"`
-				Verified             bool     `json:"verified"`
-			} `json:"legacy"`
-		} `json:"user"`
-	} `json:"data"`
-	Errors []struct {
-		Message string `json:"message"`
-	} `json:"errors"`
-}
-
 // Global cache for user IDs
 var cacheIDs sync.Map
 
@@ -70,7 +34,8 @@ func (s *Scraper) RequestAPI(req *http.Request, target interface{}) error {
 
 	// private profiles return forbidden, but also data
 	if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusForbidden {
-		return fmt.Errorf("response status %s", resp.Status)
+		content, _ := ioutil.ReadAll(resp.Body)
+		return fmt.Errorf("response status %s: %s", resp.Status, content)
 	}
 
 	if resp.Header.Get("X-Rate-Limit-Remaining") == "0" {
@@ -95,7 +60,8 @@ func (s *Scraper) GetGuestToken() error {
 	defer resp.Body.Close()
 
 	if resp.StatusCode != http.StatusOK {
-		return fmt.Errorf("response status %s", resp.Status)
+		content, _ := ioutil.ReadAll(resp.Body)
+		return fmt.Errorf("response status %s: %s", resp.Status, content)
 	}
 	body, err := ioutil.ReadAll(resp.Body)
 	if err != nil {

diff --git a/profile.go b/profile.go
@@ -30,6 +30,18 @@ type Profile struct {
 	Website        string
 }
 
+type user struct {
+	Data struct {
+		User struct {
+			RestID string     `json:"rest_id"`
+			Legacy legacyUser `json:"legacy"`
+		} `json:"user"`
+	} `json:"data"`
+	Errors []struct {
+		Message string `json:"message"`
+	} `json:"errors"`
+}
+
 // GetProfile return parsed user profile.
 func (s *Scraper) GetProfile(username string) (Profile, error) {
 	var jsn user
@@ -50,44 +62,13 @@ func (s *Scraper) GetProfile(username string) (Profile, error) {
 	if jsn.Data.User.RestID == "" {
 		return Profile{}, fmt.Errorf("rest_id not found")
 	}
+	jsn.Data.User.Legacy.IDStr = jsn.Data.User.RestID
 
 	if jsn.Data.User.Legacy.ScreenName == "" {
 		return Profile{}, fmt.Errorf("either @%s does not exist or is private", username)
 	}
 
-	user := jsn.Data.User.Legacy
-
-	profile := Profile{
-		Avatar:         user.ProfileImageURLHTTPS,
-		Banner:         user.ProfileBannerURL,
-		Biography:      user.Description,
-		FollowersCount: user.FollowersCount,
-		FollowingCount: user.FavouritesCount,
-		FriendsCount:   user.FriendsCount,
-		IsPrivate:      user.Protected,
-		IsVerified:     user.Verified,
-		LikesCount:     user.FavouritesCount,
-		ListedCount:    user.ListedCount,
-		Location:       user.Location,
-		Name:           user.Name,
-		PinnedTweetIDs: user.PinnedTweetIdsStr,
-		TweetsCount:    user.StatusesCount,
-		URL:            "https://twitter.com/" + user.ScreenName,
-		UserID:         jsn.Data.User.RestID,
-		Username:       user.ScreenName,
-	}
-
-	tm, err := time.Parse(time.RubyDate, user.CreatedAt)
-	if err == nil {
-		tm = tm.UTC()
-		profile.Joined = &tm
-	}
-
-	if len(user.Entities.URL.Urls) > 0 {
-		profile.Website = user.Entities.URL.Urls[0].ExpandedURL
-	}
-
-	return profile, nil
+	return parseProfile(jsn.Data.User.Legacy), nil
 }
 
 // GetProfile wrapper for default scraper

diff --git a/scraper.go b/scraper.go
@@ -31,6 +31,8 @@ const (
 	SearchPhotos
 	// SearchVideos - video mode
 	SearchVideos
+	// SearchUsers - user mode
+	SearchUsers
 )
 
 var defaultScraper *Scraper

diff --git a/search.go b/search.go
@@ -7,30 +7,40 @@ import (
 )
 
 // SearchTweets returns channel with tweets for a given search query
-func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
-	return getTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
+func (s *Scraper) SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
+	return getTweetTimeline(ctx, query, maxTweetsNbr, s.FetchSearchTweets)
 }
 
 // SearchTweets wrapper for default Scraper
-func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *Result {
+func SearchTweets(ctx context.Context, query string, maxTweetsNbr int) <-chan *TweetResult {
 	return defaultScraper.SearchTweets(ctx, query, maxTweetsNbr)
 }
 
-// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
-func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
+// SearchProfiles returns channel with profiles for a given search query
+func (s *Scraper) SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
+	return getUserTimeline(ctx, query, maxProfilesNbr, s.FetchSearchProfiles)
+}
+
+// SearchProfiles wrapper for default Scraper
+func SearchProfiles(ctx context.Context, query string, maxProfilesNbr int) <-chan *ProfileResult {
+	return defaultScraper.SearchProfiles(ctx, query, maxProfilesNbr)
+}
+
+// getSearchTimeline gets results for a given search query, via the Twitter frontend API
+func (s *Scraper) getSearchTimeline(query string, maxNbr int, cursor string) (*timeline, error) {
 	query = url.PathEscape(query)
-	if maxTweetsNbr > 100 {
-		maxTweetsNbr = 100
+	if maxNbr > 50 {
+		maxNbr = 50
 	}
 
 	req, err := s.newRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json")
 	if err != nil {
-		return nil, "", err
+		return nil, err
 	}
 
 	q := req.URL.Query()
 	q.Add("q", query)
-	q.Add("count", strconv.Itoa(maxTweetsNbr))
+	q.Add("count", strconv.Itoa(maxNbr))
 	q.Add("query_source", "typed_query")
 	q.Add("pc", "1")
 	q.Add("spelling_corrections", "1")
@@ -44,16 +54,36 @@ func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor strin
 		q.Add("result_filter", "image")
 	case SearchVideos:
 		q.Add("result_filter", "video")
+	case SearchUsers:
+		q.Add("result_filter", "user")
 	}
 
 	req.URL.RawQuery = q.Encode()
 
 	var timeline timeline
 	err = s.RequestAPI(req, &timeline)
 	if err != nil {
-		return nil, "", err
+		return nil, err
 	}
+	return &timeline, nil
+}
 
-	tweets, nextCursor := parseTimeline(&timeline)
+// FetchSearchTweets gets tweets for a given search query, via the Twitter frontend API
+func (s *Scraper) FetchSearchTweets(query string, maxTweetsNbr int, cursor string) ([]*Tweet, string, error) {
+	timeline, err := s.getSearchTimeline(query, maxTweetsNbr, cursor)
+	if err != nil {
+		return nil, "", err
+	}
+	tweets, nextCursor := parseTimeline(timeline)
 	return tweets, nextCursor, nil
 }
+
+// FetchSearchProfiles gets users for a given search query, via the Twitter frontend API
+func (s *Scraper) FetchSearchProfiles(query string, maxProfilesNbr int, cursor string) ([]*Profile, string, error) {
+	timeline, err := s.getSearchTimeline(query, maxProfilesNbr, cursor)
+	if err != nil {
+		return nil, "", err
+	}
+	users, nextCursor := parseUsers(timeline)
+	return users, nextCursor, nil
+}
diff --git a/search_test.go b/search_test.go
@@ -16,24 +16,50 @@ func TestFetchSearchCursor(t *testing.T) {
 			t.Fatal(err)
 		}
 		if cursor == "" {
-			t.Fatal("Expected search cursor is not empty")
+			t.Fatal("Expected search cursor is empty")
 		}
 		tweetsNbr += len(tweets)
 		nextCursor = cursor
 	}
 }
 
+func TestGetSearchProfiles(t *testing.T) {
+	count := 0
+	maxProfilesNbr := 150
+	dupcheck := make(map[string]bool)
+	scraper := New().SetSearchMode(SearchUsers)
+	for profile := range scraper.SearchProfiles(context.Background(), "Twitter", maxProfilesNbr) {
+		if profile.Error != nil {
+			t.Error(profile.Error)
+		} else {
+			count++
+			if profile.UserID == "" {
+				t.Error("Expected UserID is empty")
+			} else {
+				if dupcheck[profile.UserID] {
+					t.Errorf("Detect duplicated UserID: %s", profile.UserID)
+				} else {
+					dupcheck[profile.UserID] = true
+				}
+			}
+		}
+	}
+
+	if count != maxProfilesNbr {
+		t.Errorf("Expected profiles count=%v, got: %v", maxProfilesNbr, count)
+	}
+}
 func TestGetSearchTweets(t *testing.T) {
 	count := 0
-	maxTweetsNbr := 250
+	maxTweetsNbr := 150
 	dupcheck := make(map[string]bool)
 	for tweet := range SearchTweets(context.Background(), "twitter -filter:retweets", maxTweetsNbr) {
 		if tweet.Error != nil {
 			t.Error(tweet.Error)
 		} else {
 			count++
 			if tweet.ID == "" {
-				t.Error("Expected tweet ID is not empty")
+				t.Error("Expected tweet ID is empty")
 			} else {
 				if dupcheck[tweet.ID] {
 					t.Errorf("Detect duplicated tweet ID: %s", tweet.ID)
@@ -42,13 +68,13 @@ func TestGetSearchTweets(t *testing.T) {
 				}
 			}
 			if tweet.PermanentURL == "" {
-				t.Error("Expected tweet PermanentURL is not empty")
+				t.Error("Expected tweet PermanentURL is empty")
 			}
 			if tweet.IsRetweet {
 				t.Error("Expected tweet IsRetweet is false")
 			}
 			if tweet.Text == "" {
-				t.Error("Expected tweet Text is not empty")
+				t.Error("Expected tweet Text is empty")
 			}
 		}
 	}

diff --git a/tweets.go b/tweets.go
@@ -7,12 +7,12 @@ import (
 )
 
 // GetTweets returns channel with tweets for a given user.
-func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
-	return getTimeline(ctx, user, maxTweetsNbr, s.FetchTweets)
+func (s *Scraper) GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult {
+	return getTweetTimeline(ctx, user, maxTweetsNbr, s.FetchTweets)
 }
 
 // GetTweets wrapper for default Scraper
-func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *Result {
+func GetTweets(ctx context.Context, user string, maxTweetsNbr int) <-chan *TweetResult {
 	return defaultScraper.GetTweets(ctx, user, maxTweetsNbr)
 }