Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

</div>

> [!IMPORTANT]
> [!IMPORTANT]
> **收藏项目**,你将从 GitHub 上无延迟地接收所有发布通知~ ⭐️

<img width="100%" src="https://starify.komoridevs.icu/api/starify?owner=fllesser&repo=nonebot-plugin-parser" alt="starify" />
Expand All @@ -27,14 +27,14 @@
| 平台 | 触发的消息形态 | 视频 | 图集 | 音频 |
| ------- | --------------------------------- | ---- | ---- | ---- |
| B 站 | av 号/BV 号/链接/短链/卡片/小程序 | ✅​ | ✅​ | ✅​ |
| 抖音 | 链接(分享链接,兼容电脑端链接) | ✅​ | ✅​ | ❌️ |
| 微博 | 链接(博文,视频,show, 文章) | ✅​ | ✅​ | ❌️ |
| 小红书 | 链接(含短链)/卡片 | ✅​ | ✅​ | ❌️ |
| 快手 | 链接(包含标准链接和短链) | ✅​ | ✅​ | ❌️ |
| acfun | 链接 | ✅​ | ❌️ | ❌️ |
| youtube | 链接(含短链) | ✅​ | ❌️ | ✅​ |
| tiktok | 链接 | ✅​ | ❌️ | ❌️ |
| twitter | 链接 | ✅​ | ✅​ | ❌️ |
| 抖音 | 链接(分享链接,兼容电脑端链接) | ✅​ | ✅​ | ❌️ |
| 微博 | 链接(博文,视频,show, 文章) | ✅​ | ✅​ | ❌️ |
| 小红书 | 链接(含短链)/卡片 | ✅​ | ✅​ | ❌️ |
| 快手 | 链接(包含标准链接和短链) | ✅​ | ✅​ | ❌️ |
| acfun | 链接 | ✅​ | ❌️ | ❌️ |
| youtube | 链接(含短链) | ✅​ | ❌️ | ✅​ |
| tiktok | 链接 | ✅​ | ❌️ | ❌️ |
| twitter | 链接 | ✅​ | ✅​ | ❌️ |

支持的链接,可参考 [测试链接](https://github.com/fllesser/nonebot-plugin-parser/blob/master/tests/others/test_urls.md)

Expand All @@ -54,7 +54,7 @@

## 💿 安装

> [!Warning]
> [!Warning]
> **如果你已经在使用 nonebot-plugin-resolver[2],请在安装此插件前卸载**

<details>
Expand Down Expand Up @@ -339,18 +339,19 @@ class ExampleParser(BaseParser):

# 4. 视频内容
author = self.create_author(author_name, avatar_url)
video = self.create_video_content(video_url, cover_url, duration)
video = self.create_video(video_url, cover_url, duration)

# 5. 图集内容
image_urls = data.get("images")
images = self.create_image_contents(image_urls)
images = self.create_image(image_urls)

# 6. 返回解析结果
return self.result(
title=title,
text=description,
author=author,
contents=[video, *images],
video=video,
contents=[*images],
timestamp=timestamp,
url=f"https://example.com/video/{video_id}",
)
Expand All @@ -375,7 +376,7 @@ author = self.create_author(

```python
# 方式1:传入 URL,自动下载
video = self.create_video_content(
video = self.create_video(
url_or_task="https://example.com/video.mp4",
cover_url="https://example.com/cover.jpg", # 可选
duration=120.5 # 可选,单位:秒
Expand All @@ -384,7 +385,7 @@ video = self.create_video_content(
# 方式2:传入已创建的下载任务
from nonebot_plugin_parser.download import DOWNLOADER
video_task = DOWNLOADER.download_video(url, ext_headers=self.headers)
video = self.create_video_content(
video = self.create_video(
url_or_task=video_task,
cover_url=cover_url,
duration=duration
Expand All @@ -395,7 +396,7 @@ video = self.create_video_content(

```python
# 并发下载图集内容
images = self.create_image_contents([
images = self.create_images([
"https://example.com/img1.jpg",
"https://example.com/img2.jpg",
])
Expand All @@ -418,5 +419,5 @@ real_url = await self.get_redirect_url(

## 🎉 致谢

[nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver)
[parse-video-py](https://github.com/wujunwei928/parse-video-py)
- [nonebot-plugin-resolver](https://github.com/zhiyu1998/nonebot-plugin-resolver) | 初代解析插件
- [parse-video-py](https://github.com/wujunwei928/parse-video-py) | 借鉴了抖音解析
2 changes: 1 addition & 1 deletion src/nonebot_plugin_parser/parsers/acfun/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ async def _parse(self, searched: re.Match[str]):
video_name=f"acfun_{acid}.mp4",
)

video_content = self.create_video_content(
video_content = self.create_video(
video_task,
cover_url=video_info.coverUrl,
)
Expand Down
10 changes: 5 additions & 5 deletions src/nonebot_plugin_parser/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ def create_author(

return author

def create_video_content(
def create_video(
self,
url_or_task: str | Task[Path],
cover_url: str | None = None,
Expand Down Expand Up @@ -205,7 +205,7 @@ async def extract_cover():
duration=duration,
)

def create_image_contents(
def create_images(
self,
image_urls: list[str],
):
Expand All @@ -216,20 +216,20 @@ def create_image_contents(
contents.append(ImageContent(PathTask(task)))
return contents

def create_image_content(
def create_image(
self,
url_or_task: str | Task[Path],
alt: str | None = None,
):
"""创建图片内容"""
"""创建单个图片内容"""
if isinstance(url_or_task, str):
path_task = DOWNLOADER.download_img(url_or_task, ext_headers=self.headers)
elif isinstance(url_or_task, Task):
path_task = url_or_task

return ImageContent(PathTask(path_task), alt=alt)

def create_audio_content(
def create_audio(
self,
url_or_task: str | Task[Path],
duration: float = 0.0,
Expand Down
12 changes: 6 additions & 6 deletions src/nonebot_plugin_parser/parsers/bilibili/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ async def download_video():
)
return path

video_content = self.create_video_content(
video_content = self.create_video(
asyncio.create_task(download_video()),
page_info.cover,
page_info.duration,
Expand Down Expand Up @@ -188,7 +188,7 @@ async def _parse_dynamic_info(self, dynamic_info: DynamicInfo):
# 下载图片
author = self.create_author(dynamic_info.name, dynamic_info.avatar)
contents: list[MediaContent] = []
contents.extend(self.create_image_contents(dynamic_info.image_urls))
contents.extend(self.create_images(dynamic_info.image_urls))

repost = None
if dynamic_info.type == "DYNAMIC_TYPE_FORWARD" and dynamic_info.orig is not None:
Expand Down Expand Up @@ -234,7 +234,7 @@ async def _parse_bilibli_api_opus(self, bili_opus: Opus):
if isinstance(node, str):
result.graphics.append(node)
else:
result.graphics.append(self.create_image_content(node.url, alt=node.alt))
result.graphics.append(self.create_image(node.url, alt=node.alt))

return result

Expand All @@ -252,12 +252,12 @@ async def parse_live(self, room_id: int):
# 下载封面
if cover := room_data.cover:
cover_task = self.downloader.download_img(cover, ext_headers=self.headers)
contents.append(self.create_image_content(cover_task))
contents.append(self.create_image(cover_task))

# 下载关键帧
if keyframe := room_data.keyframe:
keyframe_task = self.downloader.download_img(keyframe, ext_headers=self.headers)
contents.append(self.create_image_content(keyframe_task))
contents.append(self.create_image(keyframe_task))

author = self.create_author(room_data.name, room_data.avatar)

Expand Down Expand Up @@ -288,7 +288,7 @@ async def parse_favlist(self, fav_id: int):

graphics: list[str | ImageContent] = []
for fav in favdata.medias:
graphics.append(self.create_image_content(fav.cover, alt=fav.desc))
graphics.append(self.create_image(fav.cover, alt=fav.desc))
graphics.append(fav.desc)

return self.result(
Expand Down
8 changes: 4 additions & 4 deletions src/nonebot_plugin_parser/parsers/douyin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,10 +92,10 @@ async def parse_video(self, url: str):

# 添加图片内容
if image_urls := video_data.image_urls:
result.contents.extend(self.create_image_contents(image_urls))
result.contents.extend(self.create_images(image_urls))
# 添加视频内容
elif video_url := video_data.video_url:
result.video = self.create_video_content(
result.video = self.create_video(
video_url,
video_data.cover_url,
video_data.duration,
Expand Down Expand Up @@ -130,8 +130,8 @@ async def parse_slides(self, video_id: str):
# 优先取动图
if dynamic_urls := slides_data.dynamic_urls:
for dynamic_url in dynamic_urls:
result.contents.append(self.create_video_content(dynamic_url))
result.contents.append(self.create_video(dynamic_url))
elif image_urls := slides_data.image_urls:
result.contents.extend(self.create_image_contents(image_urls))
result.contents.extend(self.create_images(image_urls))

return result
4 changes: 2 additions & 2 deletions src/nonebot_plugin_parser/parsers/kuaishou/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ async def _parse_v_kuaishou(self, searched: re.Match[str]):

# 添加视频内容
if video_url := photo.video_url:
result.video = self.create_video_content(video_url, photo.cover_url, photo.duration)
result.video = self.create_video(video_url, photo.cover_url, photo.duration)

# 添加图片内容
if img_urls := photo.img_urls:
result.contents.extend(self.create_image_contents(img_urls))
result.contents.extend(self.create_images(img_urls))

return result
2 changes: 1 addition & 1 deletion src/nonebot_plugin_parser/parsers/nga.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ async def _parse(self, searched: re.Match[str]):
if paths := re.findall(r"\[img\]\.(.*?)\[\/img\]", line):
for path in paths:
img_url = self.build_img_url(path)
result.graphics.append(self.create_image_content(img_url))
result.graphics.append(self.create_image(img_url))
else:
# 去除其他标签, 仅保留文本
if clean_line := re.sub(r"\[[^\]]*?\]", "", line).strip():
Expand Down
4 changes: 1 addition & 3 deletions src/nonebot_plugin_parser/parsers/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ async def safe_get(
@property
async def uri(self) -> str | None:
path = await self.safe_get()
if path is not None:
return path.as_uri()
return None
return path.as_uri() if path else None

def __repr__(self) -> str:
if self._path is not None:
Expand Down
2 changes: 1 addition & 1 deletion src/nonebot_plugin_parser/parsers/tiktok.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ async def _parse(self, searched: re.Match[str]):

# 下载封面和视频
video = YTDLP_DOWNLOADER.download_video(url)
video_content = self.create_video_content(
video_content = self.create_video(
video,
video_info.thumbnail,
duration=video_info.duration,
Expand Down
12 changes: 6 additions & 6 deletions src/nonebot_plugin_parser/parsers/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,17 @@ def _collect_result(self, data: VxTwitterResponse) -> ParseResult:

for media in data.media_extended:
if media.type == "video":
result.video = self.create_video_content(
result.video = self.create_video(
media.url,
media.thumbnail_url,
duration=media.duration,
)
break
elif media.type == "image":
result.contents.append(self.create_image_content(media.url))
result.contents.append(self.create_image(media.url))
elif media.type == "gif":
result.contents.append(
self.create_video_content(
self.create_video(
media.url,
media.thumbnail_url,
duration=media.duration,
Expand Down Expand Up @@ -161,12 +161,12 @@ def _parse_twitter_html(self, html_content: str) -> ParseResult:
href = str(href)
text = tag.get_text(strip=True)
if "下载 MP4" in text:
result.video = self.create_video_content(href, cover_url)
result.video = self.create_video(href, cover_url)
break
elif "下载图片" in text:
result.contents.append(self.create_image_content(href))
result.contents.append(self.create_image(href))
elif "下载 gif" in text:
result.contents.append(self.create_video_content(href))
result.contents.append(self.create_video(href))

# 3. 提取标题
title_tag = soup.find("h3")
Expand Down
8 changes: 4 additions & 4 deletions src/nonebot_plugin_parser/parsers/weibo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ async def parse_article(self, _id: str):
elif element.name == "img":
src = element.get("src")
if isinstance(src, str):
graphics.append(self.create_image_content(src))
graphics.append(self.create_image(src))

author = self.create_author(
data.userinfo.screen_name,
Expand Down Expand Up @@ -140,7 +140,7 @@ async def parse_fid(self, fid: str):
play_info.description,
)

video_content = self.create_video_content(
video_content = self.create_video(
play_info.video_url,
play_info.cover_url,
duration=play_info.duration,
Expand Down Expand Up @@ -211,15 +211,15 @@ def _collect_result(self, data: common.WeiboData):

# 添加视频内容
if video_url := data.video_url:
result.video = self.create_video_content(
result.video = self.create_video(
video_url,
data.cover_url,
data.duration,
)

# 添加图片内容
if image_urls := data.image_urls:
result.contents.extend(self.create_image_contents(image_urls))
result.contents.extend(self.create_images(image_urls))

# 转发内容
if data.retweeted_status:
Expand Down
9 changes: 4 additions & 5 deletions src/nonebot_plugin_parser/parsers/xiaohongshu/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,11 @@ async def parse_explore(self, url: str, xhs_id: str):

# 添加视频内容
if note_detail.is_video:
video_cover_duration = note_detail.video_cover_duration
result.video = self.create_video_content(*video_cover_duration)
result.video = self.create_video(*note_detail.video_cover_duration)

# 添加图片内容
elif image_urls := note_detail.image_urls:
result.contents.extend(self.create_image_contents(image_urls))
result.contents.extend(self.create_images(image_urls))

return result

Expand Down Expand Up @@ -128,13 +127,13 @@ async def parse_discovery(self, url: str):
else:
cover_url = note_data.image_urls[0]

result.video = self.create_video_content(
result.video = self.create_video(
video_url,
cover_url,
duration,
)
elif img_urls := note_data.image_urls:
result.contents.extend(self.create_image_contents(img_urls))
result.contents.extend(self.create_images(img_urls))

return result

Expand Down
4 changes: 2 additions & 2 deletions src/nonebot_plugin_parser/parsers/youtube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ async def parse_video(self, url: str):

if video_info.duration <= pconfig.duration_maximum:
video = YTDLP_DOWNLOADER.download_video(url, self.cookies_file)
result.video = self.create_video_content(
result.video = self.create_video(
video,
video_info.thumbnail,
video_info.duration,
)
else:
result.contents.extend(self.create_image_contents([video_info.thumbnail]))
result.contents.extend(self.create_images([video_info.thumbnail]))

return result

Expand Down
Loading