forked from gippy/instagram-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
INPUT_SCHEMA.json
138 lines (138 loc) · 7.73 KB
/
INPUT_SCHEMA.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
{
"title": "Input schema for Instagram scraper",
"description": "Below you can provide search query which will be used to search Instagram for profiles, hashtags or places. Alternatively you can provide direct page URLs. <br />Then on each page you can scrape page details, posts or comments. <br/> <strong>Important: Proxy is required to use this solution</strong>",
"type": "object",
"schemaVersion": 1,
"properties": {
"search": {
"title": "Search",
"type": "string",
"description": "Here you can provide a search query which will be used to search Instagram for profiles, hashtags or places. <br /><br /><strong>This field is optional, but this field or 'Direct Instagram page URLs' must be provided</strong>",
"editor": "textfield",
"prefill": "Nature"
},
"searchType": {
"title": "Search type",
"type": "string",
"description": "What type of pages to search (you can search hashtags, profiles and places)",
"editor": "select",
"enum": ["user", "hashtag", "place"],
"enumTitles": ["Search users", "Search hashtags", "Search places (only with login)"],
"default": "hashtag"
},
"searchLimit": {
"title": "Search results limit",
"type": "integer",
"description": "How many search results (eq. pages) should be processed.",
"editor": "number",
"default": 10,
"minimum": 1,
"maximum": 100
},
"directUrls": {
"title": "Direct Instagram page URLs",
"type": "array",
"description": "If you already have URL(s) of page(s) you wish to scrape, you can set them here. <br /><br /><strong>This field is optional, but this field or 'Search' must be provided</strong>. Location URLs require login.",
"editor": "stringList",
"placeholderValue": "URL",
"patternValue": "https:\\/\\/www\\.instagram\\.com\\/.+",
"uniqueItems": true
},
"resultsType": {
"title": "What to scrape from each page.",
"type": "string",
"description": "What to scrape from each Instagram page URL or search result. You need to provide correct parent pages, e.g. comments can be scraped only from posts, not from profiles.",
"editor": "select",
"enum": ["posts", "comments", "details"],
"enumTitles": [
"Posts (from profile pages, hashtag or place search)",
"Comments (from post pages)",
"Details (of profiles, posts or search pages - from a profile, post or search pages)"],
"default": "posts"
},
"resultsLimit": {
"title": "Max items",
"type": "integer",
"description": "How many posts or comments to scrape from each Instagram URL or found page, ignored when \"Details\" type is specified. Notice that if you use search via hashtags or place, each found page will have this limit separately. You can limit searchLimit to reduce the number of total results.",
"editor": "number",
"unit": "per page",
"default": 200
},
"maxRequestRetries": {
"title": "Max request retries",
"type": "integer",
"description": "How many times a page will be retried in case of being blocked or other errors.",
"default": 5
},
"scrapePostsUntilDate": {
"title": "Scrape posts until date",
"type": "string",
"description": "Scrapes post from the current date until it reaches a post older than a provided date. The date should be in format YYYY-MM-DD. By default scrapes all posts.",
"editor": "textfield"
},
"scrollWaitSecs": {
"title": "Scroll wait seconds",
"type": "integer",
"description": "How many seconds it will wait every 100 posts/comments when scrolling. If you scrape more than 1000 items, it is important to slow down to not get blocked. The seconds are randomized a bit. If you get blocked anyway, try to increase this number.",
"default": 15
},
"proxy": {
"title": "Proxy configuration",
"type": "object",
"description": "<strong>Proxy is required to run this actor!</strong> Either use Apify proxy, or provide your own proxy servers.",
"prefill": { "useApifyProxy": true, "apifyProxyGroups": [] },
"editor": "proxy"
},
"loginCookies": {
"title": "Cookies",
"type": "array",
"description": "If you want to sign in to Instagram to have access to more data than on anonymous profile, but do not wish to have your credentials in input, then you can use your cookies.",
"editor": "json",
"sectionCaption": "Login using cookies",
"sectionDescription": "Instagram limits access to certain data (likes, post user data, followers and others) if a user is anonymous. You can provide your cookies here (copied from chrome plugin like 'EditThisCookie' to use a normal account. Do not use your own cookies here and rather create a new temporary account. When cookies are used, the run is limited to concurrency 1 and one session, so that the account does not get banned too soon."
},
"likedByLimit": {
"title": "Get post likes",
"type": "integer",
"description": "If this limit is set above 0, then the solution will attempt to load additional details about users who liked the post.",
"editor": "number",
"unit": "likes",
"default": 0,
"sectionCaption": "Additional data loaded from detail pages",
"sectionDescription": "If limits in this section are set above 0, then when the solution enters a page, it attempts to load the specified items until the limit is reached. All of the options below require login credentials."
},
"followingLimit": {
"title": "Get profile following",
"type": "integer",
"description": "If this limit is set above 0, then the solution will attempt to find users who the profile owner is following.",
"editor": "number",
"unit": "users",
"default": 0
},
"followedByLimit": {
"title": "Get profile followed by",
"type": "integer",
"description": "If this limit is set above 0, then the solution will attempt to find users who the profile owner is followed by.",
"editor": "number",
"unit": "users",
"default": 0
},
"expandOwners": {
"title": "Query post owner",
"type": "boolean",
"description": "If this feature is used, then for each post the solution will attempt to query additional details about it's owner. This will drastically slow down the crawling speed and increase the chance of an account ban if credentials are provided.",
"editor": "checkbox",
"sectionCaption": "Experimental features",
"sectionDescription": "Features in this section are highly experimental, can result in a ban of the account if logged in and might cause the solution to fail."
},
"extendOutputFunction": {
"title": "Extend output function",
"type": "string",
"nullable": true,
"description": "Function that takes a JQuery handle ($) as an argument and returns data that will be merged with the default output",
"prefill": "($) => { return {} }",
"editor": "javascript"
}
},
"required": ["proxy"]
}