Skip to content

Commit

Permalink
рабочий вариант.
Browse files Browse the repository at this point in the history
  • Loading branch information
svdvovan committed Jul 25, 2018
1 parent 3328b62 commit c421663
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 68 deletions.
7 changes: 0 additions & 7 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 13 additions & 16 deletions src/TestTest/AllOpenCart.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,19 @@ public static void main(String[] args) throws IOException {
int y = 0;
for (Element link1 : links1) {
String addressUrl = (links1.get(y).select("a").attr("_href"));
System.out.println(addressUrl);

// Element table = doc1.getElementsByTag("table").get(8);
// Elements row = table.select("tr");
// Iterator<Element> ite = table.select("td").iterator();
//
//
// for (Element rows : row) {
// String Site = ite.next().select("a").attr("_href");
//
// System.out.println(Site);
// }


// }
//
// System.out.println(addressUrl);

try{
Document doc2 = Jsoup.connect(addressUrl).get();

String Ssil = doc2.select("a").attr("href");
System.out.println(Ssil);}
catch (IOException e){

}



y++;
}
Page++;
Expand Down
6 changes: 6 additions & 0 deletions src/TestTest/TestDrive.java
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,14 @@ public static void main(String[] args) throws IOException {

int Attr=0;
for (Element Units: Unit) {
String Opis = Unit.get(Attr).text() + "|" + Value.get(Attr).text();
System.out.print(Unit.get(Attr).text() + "|" + Value.get(Attr).text() + "\n");

Attr++;

Cell cell1 = row.createCell(1);
cell1.setCellValue(Opis);

}

int Img=0;
Expand Down
62 changes: 62 additions & 0 deletions src/TestTest/TestDrive2.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package TestTest;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;

/**
* Created by SretenskyVD on 28.06.2018.
*/
//Спарсить товары с нескольких сайтов. Формат CSV. Что должно быть: 1- фото 2- описание и технические характеристики 3- наименование 4- артикул 5- цена 6- категория (полная ирархия)
// https://master-instrument.ru - здесь все кроме фирм JTC, KING TONY, mactak http://www.jtcrussia.ru http://car-tool.ru Скрыть

public class TestDrive2 {
public static void main(String[] args) throws IOException {



System.setProperty("javax.net.ssl.trustStore", "S:/ProjectJava/certTestDrive/bibihouseru.crt.jks");
String Path = "https://www.bibihouse.ru/catalog/servizy_i_nabory/";




int Page = 1;
for (int count = 0; count <= 23; count++) {
Path = "https://www.bibihouse.ru/catalog/servizy_i_nabory/?PAGEN_17="+Page;
Document doc1 = (Document) Jsoup.connect(Path).get();
System.out.println(Path);
Elements links1 = doc1.getElementsByClass("products__name");

int y = 0;
for (Element link1 : links1) {
String addressUrl = (links1.get(y).select("a[href]").attr("abs:href"));
System.out.println(addressUrl);

Document doc2 = Jsoup.connect(addressUrl).get();

Elements Unit = doc2.getElementsByClass("characteristics-list__label");
Elements Value = doc2.getElementsByClass("characteristics-list__value");

int Attr=0;
for (Element Units: Unit) {

System.out.print(Unit.get(Attr).text() + "|" + Value.get(Attr).text() + "\n");

Attr++;
}



y++;


}
Page++;
}

}
}
113 changes: 68 additions & 45 deletions src/TestTest/technoschock.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,29 @@ public static void main(String[] args) throws IOException {

System.setProperty("javax.net.ssl.trustStore", "S:/ProjectJava/technoschock/cert/acryl-groupru.crt.jks");

File input = new File("S:/ProjectJava/technoschock/moika.htm");
Document doc1 = Jsoup.parse(input, "UTF-8");
// File input = new File("S:/ProjectJava/technoschock/moika2.htm");
// Document doc1 = Jsoup.parse(input, "UTF-8");




// String Path = "https://technoschock.ru/catalog/moyki_dlya_kukhni/?n=ajaxpages_gmci&n=Y&n=ajaxpages_gmci&PAGEN_1=";
String Path = "https://technoschock.ru/catalog/moyki_dlya_kukhni/?n=ajaxpages_gmci&n=Y&n=ajaxpages_gmci&PAGEN_1=3";
// String Path = "https://technoschock.ru/catalog/roboty_dlya_doma/";
// String Path = "https://technoschock.ru/catalog/moyki_dlya_kukhni/";

///https://technoschock.ru/catalog/moyki_dlya_kukhni/?n=ajaxpages_gmci&n=Y&n=ajaxpages_gmci&PAGEN_1=1
// int Page = 1;
//прокси http://spys.one/
int Page = 4;

// for (int count = 1; count <= 1; count++) {
// Path = "https://technoschock.ru/catalog/moyki_dlya_kukhni/?n=ajaxpages_gmci&n=Y&n=ajaxpages_gmci&PAGEN_1=" + Page;
// System.out.println(Path);
for (int count = 1; count <= 36; count++) {
Path = "https://technoschock.ru/catalog/moyki_dlya_kukhni/?n=ajaxpages_gmci&n=Y&n=ajaxpages_gmci&PAGEN_1=" + Page;
// System.out.println(Path);
System.setProperty("https.proxyHost", "103.19.81.76");
System.setProperty("https.proxyPort", "3128");


// Document doc1 = (Document) Jsoup.connect(Path).get();
Document doc1 = (Document) Jsoup.connect(Path).get();
Elements links1 = doc1.getElementsByClass("more").select("a[class=more]");
String Category = doc1.getElementsByClass("title-category").select("h1").text();
// String Category = doc1.getElementsByClass("title-category").select("h1").text();


int y = 0;
Expand All @@ -57,20 +61,35 @@ public static void main(String[] args) throws IOException {
String addressUrl = (links1.get(y).select("a[href]").attr("abs:href"));
System.out.println(addressUrl);


Document doc2 = Jsoup.connect(addressUrl)
.timeout(3000)
.timeout(50000)
//.ignoreHttpErrors(true)
.ignoreContentType(true)
.followRedirects(true)
.userAgent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36")
.userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.38 Safari/537.36")
.get();

String Price = doc2.getElementsByClass("autocalc-product-price").text();
Elements Image = doc2.getElementsByClass("thumbnail");
String NameProduct5 = doc2.getElementsByClass("title-category").select("h1").text();
String Kod = doc2.getElementsByClass("list-unstyled product-view").select("span").text();
String Description = doc2.getElementsByClass("tab-pane active").text();
Elements Image = doc2.getElementsByClass("changeimage scrollitem");
// String MainImage = doc2.getElementsByClass("js_picture_glass genimage").select("src").attr("abs:href");
String MainImage = "https://technoschock.ru" + doc2.getElementsByClass("js_picture_glass genimage").attr("src");
String NameProduct5 = doc2.getElementsByTag("h1").text();
String Kod = doc2.getElementsByClass("val").last().text();
// String Description = doc2.getElementsByClass("contentinner").select("br").append("\\n").first().text();
// Element DD = doc2.getElementsByClass("contentinner").first();
Element DD = doc2.getElementsByClass("contentinner").first();
String Description = DD.toString();
// String Description = doc2.getElementsByClass("contentinner").first().text();
String Category = doc2.getElementsByClass("first selected").get(1).text();

System.out.println(Category);

System.out.println(DD);
System.out.println(Kod);
System.out.println(MainImage);
// System.out.println(Image);


String NameProduct4 = NameProduct5.replace("/", "");
String NameProduct3 = NameProduct4.replace("\\", "");
Expand All @@ -83,15 +102,18 @@ public static void main(String[] args) throws IOException {
Cell cell = row.createCell(0);
cell.setCellValue(Kod);

Cell cell1 = row.createCell(3);
Cell cell13 = row.createCell(1);
cell13.setCellValue(Category);

Cell cell1 = row.createCell(2);
cell1.setCellValue(NameProduct);

System.out.println(Price);

Cell cell2 = row.createCell(4);
cell2.setCellValue(Price);
// Cell cell2 = row.createCell(3);
// cell2.setCellValue(Price);

Cell cell5 = row.createCell(5);
Cell cell5 = row.createCell(4);
cell5.setCellValue(Description);


Expand All @@ -111,31 +133,35 @@ public static void main(String[] args) throws IOException {
y2++;
}

System.out.println();

int Img = 0;
int y3 = 27;
int y3 = 40;
for (Element Images : Image) {
String FileName = Image.get(Img).select("img").attr("src");
String FileName1 = "https://technoschock.ru" + Image.get(Img).select("img").attr("data-bigimage");
//Element FileName1 = Image.get(Img);
String FileName = FileName1.toString();

System.out.println(FileName);

Cell cell11 = row.createCell(y3);
cell11.setCellValue(FileName);
y3++;

File f = new File(FileName);

try {
//Копирование фото
// String FILENAME = "F:/Projects/TestCopy/foto/" + Category + "/" + NameProduct + "/" + f.getName();
String FILENAME = "S:/ProjectJava/technoschock/foto/" + Category + "/" + NameProduct + "/" + f.getName();
String SvDPDFURL = FileName;
File file = new File(FILENAME);
// File f = new File(FileName);

URL url = new URL(SvDPDFURL);
FileUtils.copyURLToFile(url, file);
} catch (java.io.FileNotFoundException e) {
System.out.println("не найден путь ");
}
// try {
// //Копирование фото
// // String FILENAME = "F:/Projects/TestCopy/foto/" + Category + "/" + NameProduct + "/" + f.getName();
// String FILENAME = "S:/ProjectJava/technoschock/foto/" + Category + "/" + NameProduct + "/" + f.getName();
// String SvDPDFURL = FileName;
// File file = new File(FILENAME);
//
// URL url = new URL(SvDPDFURL);
// FileUtils.copyURLToFile(url, file);
// } catch (java.io.FileNotFoundException e) {
// System.out.println("не найден путь ");
// }


Img++;
Expand All @@ -147,21 +173,18 @@ public static void main(String[] args) throws IOException {
}



OutputStream fileOut = new FileOutputStream("S:/ProjectJava/technoschock/book.xls", true) ;
try { wb.write(fileOut);
OutputStream fileOut = new FileOutputStream("S:/ProjectJava/technoschock/" + Page + "book.xls", false);
try {
wb.write(fileOut);
fileOut.close();
}


catch(FileNotFoundException e){
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch(IOException e){
} catch (IOException e) {
e.printStackTrace();

}



Page++;
}
}
}

0 comments on commit c421663

Please sign in to comment.