Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions packages/core/src/lib/turndown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ import { createLogger } from './logger'

const logger = createLogger('Turndown')

// ============ 源平台链接清理规则 ============

/** 需要去除的站内链接域名(去掉 <a> 只保留文本) */
export const SOURCE_LINK_REMOVE_DOMAINS = [
'zhida.zhihu.com',
]

/** 跳转中转规则:domain → 真实 URL 所在的 query 参数名 */
export const SOURCE_LINK_REDIRECT_RULES: Array<{ domain: string; param: string }> = [
{ domain: 'link.zhihu.com', param: 'target' },
]

// ============ HTML 实体解码工具 ============

/**
Expand Down Expand Up @@ -443,6 +455,31 @@ function addExtensionRules(turndownService: TurndownService): void {
}
})

// 源平台链接清理(站内链接去除、跳转中转还原)
turndownService.addRule('sourcePlatformLinks', {
filter: function(node) {
if (node.nodeName !== 'A') return false
const href = (node as Element).getAttribute('href') || ''
return SOURCE_LINK_REMOVE_DOMAINS.some(d => href.includes(d))
|| SOURCE_LINK_REDIRECT_RULES.some(r => href.includes(r.domain))
},
replacement: function(content, node) {
const href = (node as Element).getAttribute('href') || ''
if (SOURCE_LINK_REMOVE_DOMAINS.some(d => href.includes(d))) {
return content
}
const rule = SOURCE_LINK_REDIRECT_RULES.find(r => href.includes(r.domain))
if (rule) {
try {
const url = new URL(href)
const real = url.searchParams.get(rule.param)
if (real) return '[' + content + '](' + real + ')'
} catch {}
}
return '[' + content + '](' + href + ')'
}
})

// 保留没有表头的表格(作为 HTML)
turndownService.keep(function(node) {
try {
Expand Down
33 changes: 32 additions & 1 deletion packages/extension/src/lib/content-processor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
* 5. Service Worker 只做图片上传 + 调用 API
*/

import { htmlToMarkdownNative, type PreprocessConfig } from '@wechatsync/core'
import { htmlToMarkdownNative, SOURCE_LINK_REMOVE_DOMAINS, SOURCE_LINK_REDIRECT_RULES, type PreprocessConfig } from '@wechatsync/core'
import { createLogger } from './logger'

const logger = createLogger('ContentProcessor')
Expand Down Expand Up @@ -77,6 +77,9 @@ export function preprocessForPlatform(rawHtml: string, config: PreprocessConfig)
// 移除 script 和 noscript(总是执行),style 根据配置决定
removeElements(container, config.keepStyles ? ['script', 'noscript'] : ['script', 'style', 'noscript'])

// 清理源平台链接(固定步骤,在 removeLinks 之前执行)
cleanSourcePlatformLinks(container)

if (config.removeLinks) {
processLinks(container, config.keepLinkDomains)
}
Expand Down Expand Up @@ -235,6 +238,34 @@ function processSvgImages(container: HTMLElement): void {
})
}

/**
* 清理源平台链接(站内链接去除、跳转中转还原)
* 规则定义在 @wechatsync/core SOURCE_LINK_*_DOMAINS,新增平台只需加域名
*/
function cleanSourcePlatformLinks(container: HTMLElement): void {
const links = Array.from(container.querySelectorAll('a'))
for (const link of links) {
const href = link.getAttribute('href') || ''
if (SOURCE_LINK_REMOVE_DOMAINS.some(d => href.includes(d))) {
const parent = link.parentNode
if (!parent) continue
while (link.firstChild) {
parent.insertBefore(link.firstChild, link)
}
parent.removeChild(link)
continue
}
const rule = SOURCE_LINK_REDIRECT_RULES.find(r => href.includes(r.domain))
if (rule) {
try {
const url = new URL(href)
const real = url.searchParams.get(rule.param)
if (real) link.setAttribute('href', real)
} catch {}
}
}
}

/**
* 处理链接
*/
Expand Down