This repository has been archived by the owner on Apr 5, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
/
Get-R18ThumbUrls.ps1
136 lines (120 loc) · 4.77 KB
/
Get-R18ThumbUrls.ps1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
function Get-R18ThumbUrl {
[CmdletBinding()]
param(
[Parameter(Mandatory = $true)]
[int]$StartPage,
[Parameter(Mandatory = $true)]
[int]$EndPage,
[Parameter(Mandatory = $true)]
[System.IO.FileInfo]$ExportPath
)
for ($Counter = $StartPage; $Counter -le $EndPage; $Counter++) {
$PageNumber = $Counter.ToString()
$Page = Invoke-WebRequest -Uri "https://www.r18.com/videos/vod/movies/actress/letter=a/sort=popular/page=$PageNumber/"
$Results = $Page.Images | Select-Object src, alt | Where-Object {
$_.src -like '*/actjpgs/*' -and `
$_.alt -notlike $null
}
$Results | Export-Csv -Path $ExportPath -Force -Append -NoTypeInformation
Write-Host "Page $Counter added to $ExportPath"
}
}
function Set-NameOrder {
[CmdletBinding()]
param(
[Parameter(Mandatory = $true)]
[System.IO.FileInfo]$Path
)
# Create backup directory in scriptroot
$BackupPath = Join-Path -Path $PSScriptRoot -ChildPath "db"
if (!(Test-Path $BackupPath)) {
New-Item -ItemType Directory -Path $BackupPath -ErrorAction SilentlyContinue
}
# Copy original scraped thumbs to backup directory
Write-Host "Backing up original scraped csv file to $BackupPath"
Copy-Item -Path $Path -Destination (Join-Path $BackupPath -ChildPath "r18thumb_original.csv")
Write-Host "Writing to cleaned names to csv... please wait"
$R18Thumbs = Import-Csv -Path $Path
# Remove periods from R18 scrape
$Names = ($R18Thumbs.alt).replace('...', '')
$NewName = @()
if ($NameOrder -eq 'true') {
foreach ($Name in $Names) {
$Temp = $Name.split(' ')
if ($Temp[1].length -ne 0) {
$First, $Last = $Name.split(' ')
$NewName += "$Last $First"
}
else {
$NewName += $Name.TrimEnd()
}
if (($x % 20) -eq 0) { Write-Host '.' -NoNewline }
}
}
if ($NameOrder -eq 'false') {
foreach ($Name in $Names) {
$NewName += $Name.TrimEnd()
}
}
$R18Actors = @()
$Temp = @()
for ($x = 0; $x -lt $NewName.Length; $x++) {
if ($NewName[$x] -in $Temp.Name) {
# Do not add to R18Actors object
}
else {
$R18Actors += New-Object -TypeName psobject -Property @{
Name = $NewName[$x]
ThumbUrl = $R18Thumbs.src[$x]
}
}
$Temp += New-Object -TypeName psobject -Property @{
Name = $NewName[$x]
}
if (($x % 20) -eq 0) { Write-Host '.' -NoNewline }
}
Write-Output $R18Actors
}
# Removes PowerShell progress bar which speeds up Invoke-WebRequest calls
$ProgressPreference = 'SilentlyContinue'
# Check settings file for config options
$SettingsPath = Resolve-Path -Path (Join-Path -Path $PSScriptRoot -ChildPath 'settings_sort_jav.ini')
$NameOrder = ((Get-Content $SettingsPath) -match '^swap-name-order').Split('=')[1]
$StartPage = ((Get-Content $SettingsPath) -match '^r18-start-page').Split('=')[1]
$EndPage = ((Get-Content $SettingsPath) -match '^r18-end-page').Split('=')[1]
$CsvExportPath = ((Get-Content $SettingsPath) -match '^r18-export-csv-path').Split('=')[1]
# Write thumb links csv file
if (!(Test-Path -Path $CsvExportPath)) {
Get-R18ThumbUrl -StartPage $StartPage -EndPage $EndPage -ExportPath $CsvExportPath
}
else {
Write-Host "File specified in r18-export-csv-path already exists. Replace?"
$Input = Read-Host -Prompt '[Y] Yes [N] No (default is "N")'
if ($Input -like 'y') {
# Create backup directory in scriptroot
$BackupPath = Join-Path -Path $PSScriptRoot -ChildPath "db"
if (!(Test-Path $BackupPath)) {
New-Item -ItemType Directory -Path $BackupPath -ErrorAction SilentlyContinue
}
# Copy original scraped thumbs to backup directory
Copy-Item -Path $CsvExportPath -Destination (Join-Path $BackupPath -ChildPath "r18thumb_original.csv")
Get-R18ThumbUrl -StartPage $StartPage -EndPage $EndPage -ExportPath $CsvExportPath
}
else {
Write-Host "Are you trying to rewrite the original scraped csv?"
$Input = Read-Host -Prompt '[Y] Yes [N] No (default is "N")'
if ($Input -notlike 'y') {
Write-Warning "Cancelled by user input. Exiting..."
return
}
else {
# Do nothing
}
}
}
# Write fixed names to original csv file while backing up original to 'db' directory
$ActorCsv = Set-NameOrder -Path $CsvExportPath
# First csv rewrite - names only
$ActorCsv | Select-Object Name, ThumbUrl | Export-Csv $CsvExportPath -Force -NoTypeInformation
Write-Host "R18 actor thumb urls written to $CsvExportPath"
pause