Skip to content

Commit

Permalink
feat: 微博帖子支持保存到数据库中
Browse files Browse the repository at this point in the history
  • Loading branch information
NanmiCoder committed Dec 24, 2023
1 parent c5b64fd commit b1441ab
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 15 deletions.
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
| 小红书 ||||||||||
| 抖音 ||||||||||
| 快手 ||||||||||
| B 站 | ||| || | | ||
| 微博 | ||| || | | ||
| B 站 ||||||||||
| 微博 | ||| || | | ||


## 使用方法
Expand Down Expand Up @@ -66,14 +66,17 @@

# 从配置文件中读取指定的帖子ID列表获取指定帖子的信息与评论信息
python3 main.py --platform xhs --lt qrcode --type detail


# 打开对应APP扫二维码登录

# 其他平台爬虫使用示例, 执行下面的命令查看
python3 main.py --help
```

打开对应APP扫二维码登录

等待爬虫程序执行完毕,数据会保存到 `data/xhs` 目录下
### 数据保存
- 支持保存到关系型数据库(Mysql、PgSQL等)
- 支持保存到csv中(data/目录下)

## 如何使用 IP 代理
➡️➡️➡️ [IP代理使用方法](docs/代理使用.md)
Expand Down
1 change: 1 addition & 0 deletions models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .douyin import *
from .kuaishou import *
from .xiaohongshu import *
from .weibo import *
20 changes: 10 additions & 10 deletions models/weibo.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,14 @@ class WeiboNote(WeiboBaseModel):
note_id = fields.CharField(max_length=64, index=True, description="帖子ID")
content = fields.TextField(null=True, description="帖子正文内容")
create_time = fields.BigIntField(description="帖子发布时间戳", index=True)
create_date_time = fields.BigIntField(description="帖子发布日期时间", index=True)
create_date_time = fields.CharField(description="帖子发布日期时间",max_length=32, index=True)
liked_count = fields.CharField(null=True, max_length=16, description="帖子点赞数")
comments_count = fields.CharField(null=True, max_length=16, description="帖子评论数量")
shared_count = fields.CharField(null=True, max_length=16, description="帖子转发数量")
note_url = fields.CharField(null=True, max_length=512, description="帖子详情URL")

class Meta:
table = "weibo_video"
table = "weibo_note"
table_description = "微博帖子"

def __str__(self):
Expand All @@ -54,7 +54,7 @@ class WeiboComment(WeiboBaseModel):
note_id = fields.CharField(max_length=64, index=True, description="帖子ID")
content = fields.TextField(null=True, description="评论内容")
create_time = fields.BigIntField(description="评论时间戳")
create_date_time = fields.BigIntField(description="评论日期时间", index=True)
create_date_time = fields.CharField(description="评论日期时间", max_length=32, index=True)
comment_like_count = fields.CharField(max_length=16, description="评论点赞数量")
sub_comment_count = fields.CharField(max_length=16, description="评论回复数")

Expand All @@ -75,16 +75,16 @@ async def update_weibo_note(note_item: Dict):
"note_id": note_id,
"content": mblog.get("text"),
"create_time": utils.rfc2822_to_timestamp(mblog.get("created_at")),
"create_date_time": utils.rfc2822_to_china_datetime(mblog.get("created_at")),
"liked_count": mblog.get("attitudes_count", 0),
"comments_count": mblog.get("comments_count", 0),
"shared_count": mblog.get("reposts_count", 0),
"create_date_time": str(utils.rfc2822_to_china_datetime(mblog.get("created_at"))),
"liked_count": str(mblog.get("attitudes_count", 0)),
"comments_count": str(mblog.get("comments_count", 0)),
"shared_count": str(mblog.get("reposts_count", 0)),
"last_modify_ts": utils.get_current_timestamp(),
"note_url": f"https://m.weibo.cn/detail/{note_id}",
"ip_location": mblog.get("region_name", "").replace("发布于 ", ""),

# 用户信息
"user_id": user_info.get("id"),
"user_id": str(user_info.get("id")),
"nickname": user_info.get("screen_name", ""),
"gender": user_info.get("gender", ""),
"profile_url": user_info.get("profile_url", ""),
Expand Down Expand Up @@ -130,7 +130,7 @@ async def update_weibo_video_comment(note_id: str, comment_item: Dict):
local_db_item = {
"comment_id": comment_id,
"create_time": utils.rfc2822_to_timestamp(comment_item.get("created_at")),
"create_date_time": utils.rfc2822_to_china_datetime(comment_item.get("created_at")),
"create_date_time": str(utils.rfc2822_to_china_datetime(comment_item.get("created_at"))),
"note_id": note_id,
"content": content.get("message"),
"sub_comment_count": str(comment_item.get("total_number", 0)),
Expand All @@ -139,7 +139,7 @@ async def update_weibo_video_comment(note_id: str, comment_item: Dict):
"ip_location": comment_item.get("source", "").replace("来自", ""),

# 用户信息
"user_id": user_info.get("id"),
"user_id": str(user_info.get("id")),
"nickname": user_info.get("screen_name", ""),
"gender": user_info.get("gender", ""),
"profile_url": user_info.get("profile_url", ""),
Expand Down

0 comments on commit b1441ab

Please sign in to comment.