Skip to content
This repository has been archived by the owner on Apr 15, 2023. It is now read-only.
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: markcda/prtscr-photo-scrapper
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: linkmast3r/prtscr-photo-scrapper
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 9 commits
  • 5 files changed
  • 1 contributor

Commits on Sep 6, 2021

  1. Update README.md

    linkmast3r authored Sep 6, 2021
    Copy the full SHA
    46869da View commit details
  2. Copy the full SHA
    c9e2be6 View commit details
  3. added proxies support

    linkmast3r authored Sep 6, 2021
    Copy the full SHA
    0822968 View commit details
  4. added proxy support uwu

    linkmast3r authored Sep 6, 2021
    Copy the full SHA
    357d080 View commit details
  5. Update README.md

    linkmast3r authored Sep 6, 2021
    Copy the full SHA
    d6e28f1 View commit details
  6. added proxy support

    linkmast3r authored Sep 6, 2021
    Copy the full SHA
    d5adb70 View commit details
  7. Update README.md

    linkmast3r authored Sep 6, 2021
    Copy the full SHA
    c8e56fb View commit details
  8. Create requirements.txt

    linkmast3r authored Sep 6, 2021
    Copy the full SHA
    8d350cd View commit details
  9. Copy the full SHA
    fcfde91 View commit details
Showing with 234 additions and 58 deletions.
  1. +17 −9 README.md
  2. +108 −0 main.py
  3. +0 −49 prtscr-photo-scrapper.py
  4. +104 −0 prtscr_photo_scrapper.ipynb
  5. +5 −0 requirements.txt
26 changes: 17 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
# prtscr-photo-scrapper
#### !! I am not responsible for sensitive information that may be leaked with this script.
With this script you can scrappe screen captures from lightshot

Скрипт для скачивания случайных скриншотов Lightshot с сайта prnt.sc.

## ВНИМАНИЕ!
## Usage:

ДАННОЕ ПРОГРАММНОЕ ОБЕСПЕЧЕНИЕ ДОЛЖНО БЫТЬ ИСПОЛЬЗОВАНО ИСКЛЮЧИТЕЛЬНО В ОБРАЗОВАТЕЛЬНЫХ ЦЕЛЯХ. ЛЮБОЕ ИСПОЛЬЗОВАНИЕ В НЕЗАКОННЫХ ЦЕЛЯХ НЕДОПУСТИМО. АВТОР НЕ НЕСЁТ ОТВЕТСТВЕННОСТИ ЗА ЛЮБЫЕ ДЕЙСТВИЯ, СВЯЗАННЫЕ С ИСПОЛЬЗОВАНИЕМ ПРОГРАММНОГО ОБЕСПЕЧЕНИЯ.

## Использование

Скрипт требует модуль `beautifulsoup4`: установите с помощью команды `pip install beautifulsoup4`.

Запустите скрипт в консоли `./prtscr-photo-scrapper.py <число-потоков-программы-скачивания{минимум=1}>`. В папку, из которой был запущен скрипт, будут загружены случайные скриншоты, сделанные программой Lightshot.
### Install the requirements from the file. You will need also python3.
```
pip install -r requirements.txt
```
### How to use the script
- Without proxies
```
python3 main.py <number of captures you want to scrape>
```
- With proxies (specify the proxy file as argument is optional)
!! Only socks4 are valid.
```
python3 main.py <number of captures you want to scrape> -proxy <csv file>
```
108 changes: 108 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#!/usr/bin/python3

import random
import concurrent.futures
import string
import requests
import os
import csv
import sys
from bs4 import BeautifulSoup


try:
os.mkdir("output")
except:
pass

headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}


def scrape_proxy(cap_num, proxy_file):

proxylist = []

with open(str(proxy_file), 'r') as f:
reader = csv.reader(f)
for row in reader:
proxylist.append(row[0])

scraped_num = 0
while cap_num > scraped_num:
try:
slug = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6))
url = "https://prnt.sc/" + slug
try:
response = requests.get(url, proxies={'https': "socks4://" + random.choice(proxylist),'http': "socks4://" + random.choice(proxylist)}, headers=headers, timeout=0.5)
except:
pass
content = response.content.decode()
soup = BeautifulSoup(content, features='lxml')
ufr = requests.get(soup.img['src'], headers=headers)
f = open(f'output/{slug}.png', 'wb')
f.write(ufr.content)
f.close()
print(f'[+] Received file {slug}.png')
scraped_num += 1
except requests.exceptions.MissingSchema:
pass
except:
pass

def scrape(cap_num):

scraped_num = 0
while cap_num > scraped_num:
try:
slug = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6))
url = "https://prnt.sc/" + slug
try:
response = requests.get(url, headers=headers)
except:
pass
content = response.content.decode()
soup = BeautifulSoup(content, features='lxml')
ufr = requests.get(soup.img['src'], headers=headers)
f = open(f'output/{slug}.png', 'wb')
f.write(ufr.content)
f.close()
print(f'[+] Received file {slug}.png')
scraped_num += 1
except requests.exceptions.MissingSchema:
pass
except:
pass



def main():

if len(sys.argv) == 2:
print("[*] Downloading captures from " + str(sys.argv[1]) + " links without proxy...")

n_of_links = int(sys.argv[1])
scrape(n_of_links)

elif len(sys.argv) == 3:
if sys.argv[2] == "-proxy":
n_of_links = int(sys.argv[1])
if len(sys.argv) == 4:
proxy_file = str(sys.argv[3])
else:
proxy_file = input("[*] Proxy file (only csv) >> ")
if proxy_file.split(".")[1] != "csv":
print("[!] Only .csv files for proxies and only socks4 are valid.")
sys.exit(1)
print("[*] Downloading captures from " + str(sys.argv[1]) + " links using proxies from " + str(proxy_file) + "...")
scrape_proxy(n_of_links, proxy_file)
else:
print("[*] Downloading captures from " + str(sys.argv[1]) + " links without proxy...")
n_of_links = int(sys.argv[1])
scrape(n_of_links)

else:
print("[!] Usage: python3 " + sys.argv[0] + " <number of captures you want to download>")
sys.exit(1)

if __name__ == "__main__":
main()
49 changes: 0 additions & 49 deletions prtscr-photo-scrapper.py

This file was deleted.

104 changes: 104 additions & 0 deletions prtscr_photo_scrapper.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "prtscr-photo-scrapper.ipynb",
"provenance": [],
"collapsed_sections": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "A7RlHt-805P8"
},
"source": [
"!pip install requests\n",
"!pip install python-csv\n",
"!pip install pysocks\n",
"!pip install beautifulsoup4\n",
"!pip install lxml"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "nwDva3WY1Jy8"
},
"source": [
"#!/usr/bin/python3\n",
"\n",
"from google.colab import drive\n",
"drive.mount('/content/drive')\n",
"\n",
"import random\n",
"import concurrent.futures\n",
"import string\n",
"import requests\n",
"import os\n",
"import csv\n",
"import sys\n",
"from bs4 import BeautifulSoup\n",
"\n",
"\n",
"try:\n",
" os.mkdir(\"/content/drive/MyDrive/output_prtscr\")\n",
"except:\n",
" pass\n",
"\n",
"headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}\n",
"\n",
"def scrape(cap_num):\n",
"\n",
" print(\"Scrapping...\")\n",
" \n",
" scraped_num = 0\n",
" while cap_num > scraped_num:\n",
" try:\n",
" slug = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(6))\n",
" url = \"https://prnt.sc/\" + slug\n",
" try:\n",
" response = requests.get(url, headers=headers, timeout=2)\n",
" except:\n",
" pass\n",
" content = response.content.decode()\n",
" soup = BeautifulSoup(content, features='lxml')\n",
" ufr = requests.get(soup.img['src'], headers=headers)\n",
" f = open(f'/content/drive/MyDrive/output_prtscr/{slug}.png', 'wb')\n",
" f.write(ufr.content)\n",
" f.close()\n",
" print(f'[+] Received file {slug}.png')\n",
" scraped_num += 1\n",
" except requests.exceptions.MissingSchema:\n",
" pass\n",
" except:\n",
" pass\n",
"\n",
"\n",
"\n",
"def main():\n",
" \n",
"\n",
" n_of_links = int(input(\"Number of links for scrapping: \"))\n",
" scrape(n_of_links)\n",
"\n",
"if __name__ == \"__main__\":\n",
" main()"
],
"execution_count": null,
"outputs": []
}
]
}
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
requests
python-csv
pysocks
beautifulsoup4
lxml