From 3b9f862944ac6f029ebcea01f38d720940144b68 Mon Sep 17 00:00:00 2001
From: dataabc <chillychen1991@gmail.com>
Date: Mon, 1 Nov 2021 19:01:13 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E9=83=A8=E5=88=86?=
 =?UTF-8?q?=E5=BE=AE=E5=8D=9A=E6=97=A0=E6=B3=95=E8=8E=B7=E5=8F=96=E7=9A=84?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Issue #374
---
 setup.py                            | 2 +-
 weibo_spider/parser/album_parser.py | 9 +++++++--
 weibo_spider/parser/photo_parser.py | 8 ++++++--
 3 files changed, 14 insertions(+), 5 deletions(-)
diff --git a/setup.py b/setup.py
index 631503b2..e76d2d0b 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name='weibo-spider',
-    version='0.2.7',
+    version='0.2.8',
     author='Chen Lei',
     author_email='chillychen1991@gmail.com',
     description='新浪微博爬虫，用python爬取新浪微博数据。',
diff --git a/weibo_spider/parser/album_parser.py b/weibo_spider/parser/album_parser.py
index c187b9df..546bb672 100644
--- a/weibo_spider/parser/album_parser.py
+++ b/weibo_spider/parser/album_parser.py
@@ -1,5 +1,5 @@
-from .util import handle_html
 from .parser import Parser
+from .util import handle_html
 
 
 class AlbumParser(Parser):
@@ -10,4 +10,9 @@ def __init__(self, cookie, album_url):
 
     def extract_pic_urls(self):
         # <img src="http://wx2.sinaimg.cn/wap180/76102133ly8fwr33wpn8fj20v90v9tbw.jpg" alt="" class="c">
-        return self.selector.xpath('//img[@class="c"]/@src')
\ No newline at end of file
+        pic_list = self.selector.xpath('//div[@class="c"]//img/@src')
+        for i, pic in enumerate(pic_list):
+            if "?" in pic:
+                pic = pic[:pic.index("?")]
+            pic_list[i] = pic
+        return pic_list
diff --git a/weibo_spider/parser/photo_parser.py b/weibo_spider/parser/photo_parser.py
index 33551c7e..236e76e2 100644
--- a/weibo_spider/parser/photo_parser.py
+++ b/weibo_spider/parser/photo_parser.py
@@ -1,5 +1,5 @@
-from .util import handle_html
 from .parser import Parser
+from .util import handle_html
 
 
 class PhotoParser(Parser):
@@ -7,9 +7,13 @@ def __init__(self, cookie, user_id):
         self.cookie = cookie
         self.url = "https://weibo.cn/" + str(user_id) + "/photo?tf=6_008"
         self.selector = handle_html(self.cookie, self.url)
+        self.user_id = user_id
 
     def extract_avatar_album_url(self):
         # Finds the href attribute of the table td div element with text 头像相册, e.g.
         # <a href="/album/166564740000001980768563?rl=1"><img width="80" height="80" src="https://tvax1.sinaimg.cn/crop.0.0.1080.1080.180/76102133ly8ga961tpte6j20u00u0q65.jpg?KID=imgbed,tva&amp;Expires=1629227741&amp;ssig=TEUDkMXcS1" alt="头像相册"></a>
         result = self.selector.xpath('//img[@alt="头像相册"]/../@href')
-        return "https://weibo.cn" + result[0]
+        if len(result) > 0:
+            return "https://weibo.cn" + result[0]
+        else:
+            return "https://weibo.cn/" + str(self.user_id) + "/avatar?rl=0"