Skip to content

Commit

Permalink
fix: 没有抓取到图片不缓存空值到库
Browse files Browse the repository at this point in the history
  • Loading branch information
wanglu committed Apr 14, 2022
1 parent 3a223a8 commit 1cb2519
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
6 changes: 3 additions & 3 deletions goscraper/goscraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ func (scraper *Scraper) getDocument() (*Document, error) {
if err != nil {
return nil, err
}
req.Header.Add("User-Agent", "GoScraper")
req.Header.Add("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36")

client := &http.Client{
client := &http.Client{
Transport: &http.Transport{
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
Expand Down Expand Up @@ -204,7 +204,7 @@ func (scraper *Scraper) parseDocument(doc *Document) error {
if cleanStr(attr.Key) == "rel" && cleanStr(attr.Val) == "canonical" {
canonical = true
}
if cleanStr(attr.Key) == "rel" && strings.Contains(cleanStr(attr.Val), "icon") {
if cleanStr(attr.Key) == "rel" && strings.Contains(cleanStr(attr.Val), "icon") {
hasIcon = true
}
if cleanStr(attr.Key) == "href" {
Expand Down
9 changes: 6 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,9 @@ func updateCatelog(data updateCatelogDto, db *sql.DB) {

func getImgFromDB(url1 string, db *sql.DB) Img {
// urlEncoded := url.QueryEscape(url1)
// fmt.Println("url获取时编码", urlEncoded)
sql_get_img := `
SELECT * FROM nav_img
WHERE url = ?;
WHERE url=?;
`
rows, err := db.Query(sql_get_img, url1)
checkErr(err)
Expand All @@ -91,7 +90,11 @@ func updateImg(url1 string, db *sql.DB) {
// 除了更新工具本身之外,也要更新 img 表
// 先看有没有,有的话就不管了,没有的话就创建
// urlEncoded := url.QueryEscape(url1)
// fmt.Println("创建时编码:", urlEncoded)
// fmt.Println("创建时编码:", urlEncoded)\
base64ImgValue := getImgBase64FromUrl(url1)
if base64ImgValue == "" {
return
}
sql_get_img := `
SELECT * FROM nav_img
WHERE url = ?;
Expand Down

0 comments on commit 1cb2519

Please sign in to comment.