diff --git a/config/options.go b/config/options.go
index 959877bf..9c9ca3d3 100644
--- a/config/options.go
+++ b/config/options.go
@@ -93,6 +93,7 @@ const (
defBoltPathname = "wayback.db"
defPoolingSize = 3
defMaxMediaSize = "512MB"
+ defCohereApiKey = ""
defWaybackTimeout = 300
defWaybackMaxRetries = 2
defWaybackUserAgent = "WaybackArchiver/1.0"
@@ -112,6 +113,10 @@ const (
defDatabaseMinConns = 1
defDatabaseConnectionLifetime = 5
+ defLLMProvider = ""
+ defLLMApiKey = ""
+ defLLMModel = ""
+
maxAttachSizeTelegram = 50000000 // 50MB
maxAttachSizeDiscord = 8000000 // 8MB
maxAttachSizeSlack = 5000000000 // 5GB
@@ -145,6 +150,7 @@ type Options struct {
notion *notion
matrix *matrix
slack *slack
+ llm *llm
services sync.Map
privacyURL string
storageDir string
@@ -269,6 +275,12 @@ type meili struct {
apikey string
}
+type llm struct {
+ provider string
+ apikey string
+ model string
+}
+
type omnivore struct {
apikey string
}
@@ -386,6 +398,11 @@ func NewOptions() *Options {
indexing: defMeiliIndexing,
apikey: defMeiliApikey,
},
+ llm: &llm{
+ provider: defLLMProvider,
+ apikey: defLLMApiKey,
+ model: defLLMModel,
+ },
omnivore: &omnivore{
apikey: defOmnivoreApikey,
},
@@ -951,6 +968,21 @@ func (o *Options) MaxMediaSize() uint64 {
return size
}
+// LLMProvider returns the LLM provider.
+func (o *Options) LLMProvider() string {
+ return o.llm.provider
+}
+
+// LLMApiKey returns the apikey of LLM provider.
+func (o *Options) LLMApiKey() string {
+ return o.llm.apikey
+}
+
+// LLMModel returns the model of LLM provider.
+func (o *Options) LLMModel() string {
+ return o.llm.model
+}
+
// MaxAttachSize returns max attach size limits for several services.
// scope: telegram
func (o *Options) MaxAttachSize(scope string) int64 {
diff --git a/config/parser.go b/config/parser.go
index 76d233a0..0d3df878 100644
--- a/config/parser.go
+++ b/config/parser.go
@@ -237,6 +237,12 @@ func (p *Parser) parseLines(lines []string) (err error) {
p.opts.meili.indexing = parseString(val, defMeiliIndexing)
case "WAYBACK_MEILI_APIKEY":
p.opts.meili.apikey = parseString(val, defMeiliApikey)
+ case "WAYBACK_LLM_PROVIDER":
+ p.opts.llm.provider = parseString(val, defLLMProvider)
+ case "WAYBACK_LLM_APIKEY":
+ p.opts.llm.apikey = parseString(val, defLLMApiKey)
+ case "WAYBACK_LLM_MODEL":
+ p.opts.llm.model = parseString(val, defLLMModel)
case "WAYBACK_OMNIVORE_APIKEY":
p.opts.omnivore.apikey = parseString(val, defOmnivoreApikey)
case "WAYBACK_PRIVACY_URL":
diff --git a/docs/environment.md b/docs/environment.md
index b7265e05..ef15eb7f 100644
--- a/docs/environment.md
+++ b/docs/environment.md
@@ -102,6 +102,9 @@ Use the `-c` / `--config` option to specify the build definition file to use.
| - | `WAYBACK_ONION_LOCAL_PORT` | `8964` | Local port for Tor Hidden Service, also support for a **reverse proxy**. This is ignored if `WAYBACK_LISTEN_ADDR` is set. |
| - | `WAYBACK_ONION_REMOTE_PORTS` | `80` | Remote ports for Tor Hidden Service, e.g. `WAYBACK_ONION_REMOTE_PORTS=80,81` |
| - | `WAYBACK_ONION_DISABLED` | `false` | Disable onion service |
+| - | `WAYBACK_LLM_PROVIDER` | `` | Enables AI-enhanced summary |
+| - | `WAYBACK_LLM_APIKEY` | `` | LLM API key |
+| - | `WAYBACK_LLM_MODEL` | `` | LLM model. Each provider has a sensible default: cohere: command-a-03-2025 \| openrouter: openrouter/auto. |
| - | `WAYBACK_SLOT` | - | Pinning service for IPFS mode of pinner, see [ipfs-pinner](https://github.com/wabarc/ipfs-pinner#supported-pinning-services) |
| - | `WAYBACK_APIKEY` | - | API key for pinning service |
| - | `WAYBACK_SECRET` | - | API secret for pinning service |
diff --git a/go.mod b/go.mod
index 5ffdd307..8ccca4b3 100644
--- a/go.mod
+++ b/go.mod
@@ -11,6 +11,7 @@ require (
github.com/davecgh/go-spew v1.1.1
github.com/dghubble/go-twitter v0.0.0-20201011215211-4b180d0cc78d
github.com/dghubble/oauth1 v0.7.1
+ github.com/didasy/tldr v0.7.0
github.com/dstotijn/go-notion v0.11.0
github.com/dustin/go-humanize v1.0.0
github.com/gabriel-vasile/mimetype v1.4.2
@@ -66,6 +67,7 @@ require (
github.com/MercuryEngineering/CookieMonster v0.0.0-20180304172713-1584578b3403 // indirect
github.com/SaveTheRbtz/generic-sync-map-go v0.0.0-20230201052002-6c5833b989be // indirect
github.com/VividCortex/ewma v1.2.0 // indirect
+ github.com/alixaxel/pagerank v0.0.0-20160306110729-14bfb4c1d88c // indirect
github.com/andybalholm/brotli v1.1.0 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/benbjohnson/clock v1.3.5 // indirect
@@ -86,7 +88,7 @@ require (
github.com/decred/dcrd/crypto/blake256 v1.0.1 // indirect
github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 // indirect
github.com/dghubble/sling v1.3.0 // indirect
- github.com/dlclark/regexp2 v1.7.0 // indirect
+ github.com/dlclark/regexp2 v1.9.0 // indirect
github.com/dop251/goja v0.0.0-20221115122301-6c0d9883792e // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fortytw2/leaktest v1.3.0 // indirect
diff --git a/go.sum b/go.sum
index f3e8375c..7ffbe835 100644
--- a/go.sum
+++ b/go.sum
@@ -16,6 +16,8 @@ github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAU
github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+github.com/alixaxel/pagerank v0.0.0-20160306110729-14bfb4c1d88c h1:UUHM6/UM34ESICar/DWOhLt2rqYabsvfjmupiY9z+iE=
+github.com/alixaxel/pagerank v0.0.0-20160306110729-14bfb4c1d88c/go.mod h1:e7Vic/xXDZAQ8ftWoLnVrXseAAvt54SVYrcirjCKcX0=
github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M=
github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
@@ -118,9 +120,12 @@ github.com/dghubble/sling v1.3.0 h1:pZHjCJq4zJvc6qVQ5wN1jo5oNZlNE0+8T/h0XeXBUKU=
github.com/dghubble/sling v1.3.0/go.mod h1:XXShWaBWKzNLhu2OxikSNFrlsvowtz4kyRuXUG7oQKY=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
+github.com/didasy/tldr v0.7.0 h1:9kFLpmeGeGPPIRysln8B9USbW+L5zAAlw9ol8gwc2gU=
+github.com/didasy/tldr v0.7.0/go.mod h1:1W7p626SAyEeSkAAzFJLAG/Hr6imK7sxEr+K6x7e7Ao=
github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
-github.com/dlclark/regexp2 v1.7.0 h1:7lJfhqlPssTb1WQx4yvTHN0uElPEv52sbaECrAQxjAo=
github.com/dlclark/regexp2 v1.7.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
+github.com/dlclark/regexp2 v1.9.0 h1:pTK/l/3qYIKaRXuHnEnIf7Y5NxfRPfpb7dis6/gdlVI=
+github.com/dlclark/regexp2 v1.9.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8=
github.com/dop251/goja v0.0.0-20211022113120-dc8c55024d06/go.mod h1:R9ET47fwRVRPZnOGvHxxhuZcbrMCuiqOz3Rlrh4KSnk=
github.com/dop251/goja v0.0.0-20221115122301-6c0d9883792e h1:Uo51nR73BJlci20AE5tXT5qiLSGZy5LHnRlKt7VkcUM=
github.com/dop251/goja v0.0.0-20221115122301-6c0d9883792e/go.mod h1:yRkwfj0CBpOGre+TwBsqPV0IH0Pk73e4PXJOeNDboGs=
@@ -140,6 +145,8 @@ github.com/fortytw2/leaktest v1.3.0 h1:u8491cBMTQ8ft8aeV+adlcytMZylmA5nnwwkRZjI8
github.com/fortytw2/leaktest v1.3.0/go.mod h1:jDsjWgpAGjm2CA7WthBh/CdZYEPF31XHquHwclZch5g=
github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
+github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
+github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
@@ -351,6 +358,7 @@ github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOEL
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/nbd-wtf/go-nostr v0.17.1-0.20230426111250-32ca737acf77 h1:D7BdjjOD0D8r7RwLmrOTOJKEZ56D9YhLCEETz2Xh0Vo=
github.com/nbd-wtf/go-nostr v0.17.1-0.20230426111250-32ca737acf77/go.mod h1:YCDHJtaFQE76d1ZkcUsTkz3dYNP+bldo5CIQwXPPcbk=
+github.com/nxadm/tail v1.4.4 h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=
github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A=
github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U=
github.com/oliamb/cutter v0.2.2 h1:Lfwkya0HHNU1YLnGv2hTkzHfasrSMkgv4Dn+5rmlk3k=
@@ -358,11 +366,14 @@ github.com/oliamb/cutter v0.2.2/go.mod h1:4BenG2/4GuRBDbVm/OPahDVqbrOemzpPiG5mi1
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk=
+github.com/onsi/ginkgo v1.14.0 h1:2mOpI4JVVPBN+WQRa0WKH2eXR+Ey+uK4n7Zj0aYpIQA=
github.com/onsi/ginkgo v1.14.0/go.mod h1:iSB4RoI2tjJc9BBv4NKIKWKya62Rps+oPG/Lv9klQyY=
github.com/onsi/gomega v1.4.1/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY=
github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo=
+github.com/onsi/gomega v1.27.6 h1:ENqfyGeS5AX/rlXDd/ETokDz93u0YufY1Pgxuy/PvWE=
+github.com/onsi/gomega v1.27.6/go.mod h1:PIQNjfQwkP3aQAH7lf7j87O/5FiNr+ZR8+ipb+qQlhg=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde h1:x0TT0RDC7UhAVbbWWBzr41ElhJx5tXPWkIHA2HWPRuw=
github.com/orisano/pixelmatch v0.0.0-20220722002657-fb0b55479cde/go.mod h1:nZgzbfBr3hhjoZnS66nKrHmduYNpc34ny7RK4z5/HM0=
github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
@@ -682,6 +693,7 @@ gopkg.in/sourcemap.v1 v1.0.5 h1:inv58fC9f9J3TK2Y2R1NPntXEn3/wjWHkonhIUODNTI=
gopkg.in/sourcemap.v1 v1.0.5/go.mod h1:2RlvNNSMglmRrcvhfuzp4hQHwOtjxlbjX7UPY/GXb78=
gopkg.in/telebot.v3 v3.0.0-20220130115853-f0291132d3c3 h1:ifpOmJCnVni31dBAw99qxgCRfD33ROgv7vYxuhu+iWc=
gopkg.in/telebot.v3 v3.0.0-20220130115853-f0291132d3c3/go.mod h1:7rExV8/0mDDNu9epSrDm/8j22KLaActH1Tbee6YjzWg=
+gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ=
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw=
gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/reduxer/reduxer.go b/reduxer/reduxer.go
index 14cb6e09..dd6c2cb5 100644
--- a/reduxer/reduxer.go
+++ b/reduxer/reduxer.go
@@ -27,6 +27,7 @@ import (
"github.com/wabarc/wayback/config"
"github.com/wabarc/wayback/errors"
"github.com/wabarc/wayback/ingress"
+ "github.com/wabarc/wayback/summary"
"golang.org/x/sync/errgroup"
)
@@ -57,6 +58,7 @@ type bundle struct {
shots *screenshot.Screenshots[screenshot.Path]
artifact Artifact
article readability.Article
+ summary string
}
// Artifact represents the file paths stored on the local disk.
@@ -135,6 +137,11 @@ func (b *bundle) Article() readability.Article {
return b.article
}
+// Summary returns a summary of article.
+func (b *bundle) Summary() string {
+ return b.summary
+}
+
// Do executes secreenshot, print PDF and export html of given URLs
// Returns a set of bundle containing screenshot data and file path
// nolint:gocyclo
@@ -221,11 +228,19 @@ func Do(ctx context.Context, opts *config.Options, urls ...*url.URL) (Reduxer, e
if err = os.WriteFile(fp, helper.String2Byte(article.TextContent), filePerm); err == nil && article.TextContent != "" {
artifact.Txt.Local = fp
}
+
+ // Generate summary
+ summarizer := summary.NewSummary(opts)
+ sum, err := summarizer.Summarize(article.TextContent)
+ if err != nil {
+ logger.Error("sumarize failed: %v", err)
+ }
+
// Upload files to third-party server
if err = remotely(ctx, artifact); err != nil {
logger.Error("upload files to remote server failed: %v", err)
}
- bundle := &bundle{shots: shot, artifact: *artifact, article: article}
+ bundle := &bundle{shots: shot, artifact: *artifact, article: article, summary: sum}
bs.Store(Src(shot.URL), bundle)
return nil
})
diff --git a/summary/chat.go b/summary/chat.go
new file mode 100644
index 00000000..340be066
--- /dev/null
+++ b/summary/chat.go
@@ -0,0 +1,49 @@
+// Copyright 2026 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+const systemPrompt = `You are a digital archivist and information synthesizer, your expertise lies in distilling "noise" from legacy web data into high-signal summaries.
+
+Rules:
+- Summary point must be anchored by specific verbatim quotes
+- Ignore UI elements (navbars, footers) and focus on the core content
+- Be objective, clinical, and precise. Strip away marketing fluff to reveal the underlying data
+- Summary must be in the same language as the source content
+- Do NOT repeat ideas from previous snapshots unless conditions have materially changed
+
+FORMATTING RULES (STRICT):
+- STRICTOR PROHIBITION: Do not use Markdown bolding (**text**)
+- Use ONLY plain text without any formatting
+- Use simple line breaks to separate points
+- Do NOT use headers or bold labels
+
+The output should be a maximum of 280 plain paragraphs.`
+
+type chatMessage struct {
+ Role string `json:"role"`
+ Content string `json:"content"`
+}
+
+type chatRequest struct {
+ Model string `json:"model"`
+ Messages []chatMessage `json:"messages"`
+}
+
+type chatContent struct {
+ Type string
+ Text string
+}
+
+type chatChoice struct {
+ Message chatMessage `json:"message,omitempty"`
+ Role string `json:"role"`
+ Contents []chatContent `json:"content"`
+}
+
+type chatResponse struct {
+ Message chatChoice `json:"message,omitempty"`
+ ID string `json:"id"`
+ Choices []chatChoice `json:"choices,omitempty"`
+}
diff --git a/summary/cohere.go b/summary/cohere.go
new file mode 100644
index 00000000..0149273f
--- /dev/null
+++ b/summary/cohere.go
@@ -0,0 +1,96 @@
+// Copyright 2023 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "strings"
+
+ "github.com/wabarc/wayback/config"
+ "github.com/wabarc/wayback/ingress"
+)
+
+// Interface guard
+var _ Summarizer = (*Cohere)(nil)
+
+// Cohere represents a text summarization algorithm powered by Cohere's AI models.
+type Cohere struct {
+ client *http.Client
+ apiKey string
+ model string
+}
+
+// NewCohere creates a `Cohere` instance with the specified `http.Client` instance and API key.
+// If the `http.Client` instance is `nil`, the default client is used. This function returns a pointer
+// to the newly created `Cohere` instance and an error, if any.
+func NewCohere(c *http.Client, opts *config.Options) *Cohere {
+ if c == nil {
+ c = ingress.Client()
+ }
+ model := opts.LLMModel()
+ if model == "" {
+ model = "command-a-03-2025"
+ }
+
+ return &Cohere{
+ client: c,
+ apiKey: opts.LLMApiKey(),
+ model: model,
+ }
+}
+
+// Summarize generates a summary of the input text using Cohere's AI models.
+// Returns the generated summary as a string and an error, if any.
+func (coh *Cohere) Summarize(s string) (string, error) {
+ s = strings.TrimSpace(s)
+ if s == "" {
+ return "", fmt.Errorf("text not found")
+ }
+
+ body := chatRequest{
+ Model: coh.model,
+ Messages: []chatMessage{
+ {Role: "system", Content: systemPrompt},
+ {Role: "user", Content: s},
+ },
+ }
+ buf, err := json.Marshal(body)
+ if err != nil {
+ return "", fmt.Errorf("failed to marshal json: %v", err)
+ }
+
+ endpoint := "https://api.cohere.ai/v2/chat"
+ req, err := http.NewRequest(http.MethodPost, endpoint, bytes.NewReader(buf))
+ if err != nil {
+ return "", fmt.Errorf("failed to make request: %v", err)
+ }
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("Accept", "application/json")
+ req.Header.Set("Authorization", "Bearer "+coh.apiKey)
+
+ res, err := coh.client.Do(req)
+ if err != nil {
+ return "", err
+ }
+ defer res.Body.Close()
+
+ if res.StatusCode < http.StatusOK || res.StatusCode >= http.StatusMultipleChoices {
+ return "", fmt.Errorf("cohere api error: status %d", res.StatusCode)
+ }
+
+ var cr chatResponse
+ if err := json.NewDecoder(res.Body).Decode(&cr); err != nil {
+ return "", fmt.Errorf("failed to decode body: %v", err)
+ }
+
+ if len(cr.Message.Contents) > 0 && strings.TrimSpace(cr.Message.Contents[0].Text) != "" {
+ return strings.TrimSpace(cr.Message.Contents[0].Text), nil
+ }
+
+ return s, nil
+}
diff --git a/summary/cohere_test.go b/summary/cohere_test.go
new file mode 100644
index 00000000..67258b0d
--- /dev/null
+++ b/summary/cohere_test.go
@@ -0,0 +1,194 @@
+// Copyright 2023 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "os"
+ "strings"
+ "testing"
+
+ "github.com/wabarc/helper"
+ "github.com/wabarc/wayback/config"
+)
+
+var (
+ apiKey = os.Getenv("WAYBACK_LLM_APIKEY")
+ summarized = "This is a summary of the test input."
+ summarizeResponse = []byte(fmt.Sprintf(`{
+ "summary": "%s"
+}`, summarized))
+
+ handleFunc = func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ switch r.URL.Path {
+ case "/v2/chat":
+ w.Write(summarizeResponse)
+ }
+ }
+)
+
+func TestNewCohere(t *testing.T) {
+ httpClient, mux, server := helper.MockServer()
+ defer server.Close()
+
+ mux.HandleFunc("/", handleFunc)
+
+ tests := []struct {
+ desc string
+ client *http.Client
+ key string
+ expectErr bool
+ expectNil bool
+ }{
+ {
+ desc: "Valid inputs",
+ client: httpClient,
+ key: "valid_api_key",
+ expectErr: false,
+ expectNil: false,
+ },
+ {
+ desc: "Invalid API key",
+ client: httpClient,
+ key: apiKey,
+ expectErr: true,
+ expectNil: true,
+ },
+ {
+ desc: "Nil http.Client",
+ client: nil,
+ key: apiKey,
+ expectErr: false,
+ expectNil: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.desc, func(t *testing.T) {
+ t.Setenv("WAYBACK_LLM_PROVIDER", "cohere")
+ t.Setenv("WAYBACK_LLM_APIKEY", tt.key)
+
+ parser := config.NewParser()
+ opts, err := parser.ParseEnvironmentVariables()
+ if err != nil {
+ t.Fatalf("Parse environment variables or flags failed, error: %v", err)
+ }
+
+ cohere := NewCohere(tt.client, opts)
+ if !tt.expectNil && cohere == nil {
+ t.Errorf("Unexpected nil value for Cohere instance")
+ }
+ })
+ }
+}
+
+func TestCohereSummarize(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ mockStatus int
+ mockBody string
+ expected string
+ expectedErr string
+ }{
+ {
+ name: "Empty string",
+ input: "",
+ expected: "",
+ expectedErr: "text not found",
+ },
+ {
+ name: "Valid input",
+ input: "This is a test input for summarization.",
+ mockStatus: 200,
+ mockBody: `{
+ "messages":[
+ {"role":"user","content":"This is the summary."}
+ ]
+ }`,
+ expected: "This is the summary.",
+ expectedErr: "",
+ },
+ {
+ name: "API error status",
+ input: "Non-empty",
+ mockStatus: 500,
+ mockBody: `{"error":"server"}`,
+ expected: "",
+ expectedErr: "cohere api error: status 500",
+ },
+ }
+
+ httpClient, mux, server := helper.MockServer()
+ defer server.Close()
+
+ // Register handler at expected endpoint path used by the client.
+ mux.HandleFunc("/v2/chat", func(w http.ResponseWriter, r *http.Request) {
+ // optional: assert method and headers
+ if r.Method != http.MethodPost {
+ http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+ // Find matching test case by inspecting body or rely on sequential handling.
+ // For simplicity, read body and decide response based on test inputs:
+ var req struct {
+ Messages []struct {
+ Content string `json:"content"`
+ } `json:"messages"`
+ }
+ _ = json.NewDecoder(r.Body).Decode(&req)
+ r.Body.Close()
+
+ switch {
+ case strings.Contains(req.Messages[1].Content, "This is a test input for summarization."):
+ w.WriteHeader(200)
+ w.Write([]byte(`{"messages":[{"role":"assistant","content":"This is the summary."}]}`))
+ case strings.Contains(req.Messages[1].Content, "Non-empty"):
+ w.WriteHeader(500)
+ w.Write([]byte("server error"))
+ default:
+ // default success
+ w.WriteHeader(200)
+ w.Write([]byte(`{"messages":[{"role":"assistant","content":"ok"}]}`))
+ }
+ })
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Setenv("WAYBACK_LLM_PROVIDER", "cohere")
+ t.Setenv("WAYBACK_LLM_APIKEY", "test-key")
+
+ parser := config.NewParser()
+ opts, err := parser.ParseEnvironmentVariables()
+ if err != nil {
+ t.Fatalf("Parse environment variables or flags failed, error: %v", err)
+ }
+
+ coh := NewCohere(httpClient, opts)
+
+ actual, actualErr := coh.Summarize(tt.input)
+
+ if tt.expectedErr != "" {
+ if actualErr == nil {
+ t.Fatalf("expected error %q, got nil", tt.expectedErr)
+ }
+ if actualErr.Error() != tt.expectedErr {
+ t.Fatalf("unexpected error, got %q expected %q", actualErr.Error(), tt.expectedErr)
+ }
+ return
+ }
+
+ if actualErr != nil {
+ t.Fatalf("unexpected error: %v", actualErr)
+ }
+ if actual != tt.expected {
+ t.Fatalf(`unexpected summary, got "%v" instead of "%v"`, actual, tt.expected)
+ }
+ })
+ }
+}
diff --git a/summary/doc.go b/summary/doc.go
new file mode 100644
index 00000000..77056c34
--- /dev/null
+++ b/summary/doc.go
@@ -0,0 +1,9 @@
+// Copyright 2023 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+/*
+Package summary is designed to provide a comprehensive set of tools for
+automated text summarization.
+*/
+package summary // import "github.com/wabarc/wayback/summary"
diff --git a/summary/legacy.go b/summary/legacy.go
new file mode 100644
index 00000000..e25f70bf
--- /dev/null
+++ b/summary/legacy.go
@@ -0,0 +1,49 @@
+// Copyright 2023 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "fmt"
+ "strings"
+
+ "github.com/didasy/tldr"
+)
+
+const maxCharacters = 128
+
+// Interface guard
+var _ Summarizer = (*Legacy)(nil)
+
+// Legacy implements the Summarizer interface using the tldr.Bag package to
+// perform local summarization.
+type Legacy struct {
+ *tldr.Bag
+}
+
+// NewLegacy creates a new instance of the Legacy struct with a new tldr.Bag instance.
+func NewLegacy() *Legacy {
+ return &Legacy{tldr.New()}
+}
+
+// Summarize generates a summary of the input text using legacy summarization.
+// It returns the summary as a string and any error that occurred during summarization.
+func (l *Legacy) Summarize(s string) (string, error) {
+ s = strings.TrimSpace(s)
+ if s == "" {
+ return "", fmt.Errorf("text not found")
+ }
+
+ l.MaxCharacters = maxCharacters
+ res, err := l.Bag.Summarize(s, 1)
+ if err != nil {
+ return "", fmt.Errorf("summarize failed: %v", err)
+ }
+
+ if len(res) == 0 {
+ return s, nil
+ }
+
+ return res[0], nil
+}
diff --git a/summary/legacy_test.go b/summary/legacy_test.go
new file mode 100644
index 00000000..2ea661a7
--- /dev/null
+++ b/summary/legacy_test.go
@@ -0,0 +1,55 @@
+// Copyright 2023 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "testing"
+)
+
+func TestLegacy(t *testing.T) {
+ // Define test cases as a slice of structs.
+ tests := []struct {
+ name string
+ input string
+ want string
+ wantErr bool
+ errMessage string
+ }{
+ {
+ name: "valid input",
+ input: "This is a test string.",
+ want: "This is a test string.",
+ wantErr: false,
+ errMessage: "",
+ },
+ {
+ name: "empty input",
+ input: "",
+ want: "",
+ wantErr: true,
+ errMessage: "text not found",
+ },
+ }
+
+ local := NewLegacy()
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ got, err := local.Summarize(tt.input)
+
+ if (err != nil) != tt.wantErr {
+ t.Fatalf(`Unexpected error status. Got "%v", but wanted error="%v"`, err, tt.wantErr)
+ }
+
+ if tt.wantErr && err.Error() != tt.errMessage {
+ t.Fatalf(`Unexpected error message. Got "%v", but wanted "%v"`, err.Error(), tt.errMessage)
+ }
+
+ if !tt.wantErr && got != tt.want {
+ t.Fatalf(`Unexpected summary. Got "%v", but wanted "%v"`, got, tt.want)
+ }
+ })
+ }
+}
diff --git a/summary/openrouter.go b/summary/openrouter.go
new file mode 100644
index 00000000..56a06c07
--- /dev/null
+++ b/summary/openrouter.go
@@ -0,0 +1,95 @@
+// Copyright 2026 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "strings"
+
+ "github.com/wabarc/wayback/config"
+ "github.com/wabarc/wayback/ingress"
+)
+
+// Interface guard
+var _ Summarizer = (*OpenRouter)(nil)
+
+// OpenRouter represents a text summarization client for OpenRouter LLM service.
+type OpenRouter struct {
+ client *http.Client
+ apiKey string
+ model string
+}
+
+// NewOpenRouter creates a `OpenRouter` instance with the specified `http.Client` and options.
+// If the `http.Client` instance is `nil`, the default client is used. This function returns a pointer
+// to the newly created `OpenRouter` instance and an error, if any.
+func NewOpenRouter(c *http.Client, opts *config.Options) *OpenRouter {
+ if c == nil {
+ c = ingress.Client()
+ }
+ model := opts.LLMModel()
+ if model == "" {
+ model = "openrouter/auto"
+ }
+
+ return &OpenRouter{
+ client: c,
+ apiKey: opts.LLMApiKey(),
+ model: model,
+ }
+}
+
+// Summarize generates a summary of the input text using OpenRouter's AI models.
+// Returns the generated summary as a string and an error, if any.
+func (or *OpenRouter) Summarize(s string) (string, error) {
+ s = strings.TrimSpace(s)
+ if s == "" {
+ return "", fmt.Errorf("text not found")
+ }
+
+ body := chatRequest{
+ Model: or.model,
+ Messages: []chatMessage{
+ {Role: "system", Content: systemPrompt},
+ {Role: "user", Content: s},
+ },
+ }
+ buf, err := json.Marshal(body)
+ if err != nil {
+ return "", fmt.Errorf("failed to marshal json: %v", err)
+ }
+
+ endpoint := "https://openrouter.ai/api/v1/chat/completions"
+ req, err := http.NewRequest(http.MethodPost, endpoint, bytes.NewReader(buf))
+ if err != nil {
+ return "", fmt.Errorf("failed to make request: %v", err)
+ }
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("Authorization", "Bearer "+or.apiKey)
+
+ res, err := or.client.Do(req)
+ if err != nil {
+ return "", err
+ }
+ defer res.Body.Close()
+
+ if res.StatusCode < http.StatusOK || res.StatusCode >= http.StatusMultipleChoices {
+ return "", fmt.Errorf("openrouter api error: status %d", res.StatusCode)
+ }
+
+ var cr chatResponse
+ if err := json.NewDecoder(res.Body).Decode(&cr); err != nil {
+ return "", fmt.Errorf("failed to decode body: %v", err)
+ }
+
+ if len(cr.Choices) > 0 && strings.TrimSpace(cr.Choices[0].Message.Content) != "" {
+ return strings.TrimSpace(cr.Choices[0].Message.Content), nil
+ }
+
+ return s, nil
+}
diff --git a/summary/openrouter_test.go b/summary/openrouter_test.go
new file mode 100644
index 00000000..0b262735
--- /dev/null
+++ b/summary/openrouter_test.go
@@ -0,0 +1,183 @@
+// Copyright 2026 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "encoding/json"
+ "net/http"
+ "strings"
+ "testing"
+
+ "github.com/wabarc/helper"
+ "github.com/wabarc/wayback/config"
+)
+
+func TestNewOpenRouter(t *testing.T) {
+ httpClient, mux, server := helper.MockServer()
+ defer server.Close()
+
+ handleFunc := func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ switch r.URL.Path {
+ case "/api/v1/chat/completions":
+ w.Write(summarizeResponse)
+ }
+ }
+ mux.HandleFunc("/", handleFunc)
+
+ tests := []struct {
+ desc string
+ client *http.Client
+ key string
+ expectErr bool
+ expectNil bool
+ }{
+ {
+ desc: "Valid inputs",
+ client: httpClient,
+ key: "valid_api_key",
+ expectErr: false,
+ expectNil: false,
+ },
+ {
+ desc: "Invalid API key",
+ client: httpClient,
+ key: apiKey,
+ expectErr: true,
+ expectNil: true,
+ },
+ {
+ desc: "Nil http.Client",
+ client: nil,
+ key: apiKey,
+ expectErr: false,
+ expectNil: false,
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.desc, func(t *testing.T) {
+ t.Setenv("WAYBACK_LLM_PROVIDER", "cohere")
+ t.Setenv("WAYBACK_LLM_APIKEY", tt.key)
+
+ parser := config.NewParser()
+ opts, err := parser.ParseEnvironmentVariables()
+ if err != nil {
+ t.Fatalf("Parse environment variables or flags failed, error: %v", err)
+ }
+
+ cohere := NewOpenRouter(tt.client, opts)
+ if !tt.expectNil && cohere == nil {
+ t.Errorf("Unexpected nil value for OpenRouter instance")
+ }
+ })
+ }
+}
+
+func TestOpenRouterSummarize(t *testing.T) {
+ tests := []struct {
+ name string
+ input string
+ mockStatus int
+ mockBody string
+ expected string
+ expectedErr string
+ }{
+ {
+ name: "Empty string",
+ input: "",
+ expected: "",
+ expectedErr: "text not found",
+ },
+ {
+ name: "Valid input",
+ input: "This is a test input for summarization.",
+ mockStatus: 200,
+ mockBody: `{
+ "messages":[
+ {"role":"user","content":"This is the summary."}
+ ]
+ }`,
+ expected: "This is the summary.",
+ expectedErr: "",
+ },
+ {
+ name: "API error status",
+ input: "Non-empty",
+ mockStatus: 500,
+ mockBody: `{"error":"server"}`,
+ expected: "",
+ expectedErr: "cohere api error: status 500",
+ },
+ }
+
+ httpClient, mux, server := helper.MockServer()
+ defer server.Close()
+
+ // Register handler at expected endpoint path used by the client.
+ mux.HandleFunc("/api/v1/chat/completions", func(w http.ResponseWriter, r *http.Request) {
+ // optional: assert method and headers
+ if r.Method != http.MethodPost {
+ http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+ return
+ }
+ // Find matching test case by inspecting body or rely on sequential handling.
+ // For simplicity, read body and decide response based on test inputs:
+ var req struct {
+ Messages []struct {
+ Content string `json:"content"`
+ } `json:"messages"`
+ }
+ _ = json.NewDecoder(r.Body).Decode(&req)
+ r.Body.Close()
+
+ switch {
+ case strings.Contains(req.Messages[1].Content, "This is a test input for summarization."):
+ w.WriteHeader(200)
+ w.Write([]byte(`{"messages":[{"role":"assistant","content":"This is the summary."}]}`))
+ case strings.Contains(req.Messages[1].Content, "Non-empty"):
+ w.WriteHeader(500)
+ w.Write([]byte("server error"))
+ default:
+ // default success
+ w.WriteHeader(200)
+ w.Write([]byte(`{"messages":[{"role":"assistant","content":"ok"}]}`))
+ }
+ })
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ t.Setenv("WAYBACK_LLM_PROVIDER", "cohere")
+ t.Setenv("WAYBACK_LLM_APIKEY", "test-key")
+
+ parser := config.NewParser()
+ opts, err := parser.ParseEnvironmentVariables()
+ if err != nil {
+ t.Fatalf("Parse environment variables or flags failed, error: %v", err)
+ }
+
+ coh := NewOpenRouter(httpClient, opts)
+
+ actual, actualErr := coh.Summarize(tt.input)
+
+ if tt.expectedErr != "" {
+ if actualErr == nil {
+ t.Fatalf("expected error %q, got nil", tt.expectedErr)
+ }
+ if actualErr.Error() != tt.expectedErr {
+ t.Fatalf("unexpected error, got %q expected %q", actualErr.Error(), tt.expectedErr)
+ }
+ return
+ }
+
+ if actualErr != nil {
+ t.Fatalf("unexpected error: %v", actualErr)
+ }
+ if actual != tt.expected {
+ t.Fatalf(`unexpected summary, got "%v" instead of "%v"`, actual, tt.expected)
+ }
+ })
+ }
+}
diff --git a/summary/summary.go b/summary/summary.go
new file mode 100644
index 00000000..90f4ac31
--- /dev/null
+++ b/summary/summary.go
@@ -0,0 +1,34 @@
+// Copyright 2023 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "strings"
+
+ "github.com/wabarc/wayback/config"
+ "github.com/wabarc/wayback/ingress"
+)
+
+// Summarizer is the interface that wraps the basic Summarize method.
+//
+// Summarize takes in a string of text and returns a summary.
+type Summarizer interface {
+ Summarize(s string) (string, error)
+}
+
+// NewSummary creates and returns a Summarizer based on the configured LLM provider.
+// It inspects opts.LLMProvider() (case-insensitive) and constructs a provider-specific
+// handler. It falls back to the legacy summarizer implementation.
+// The returned Summarizer wraps the chosen handler.
+func NewSummary(opts *config.Options) Summarizer {
+ switch strings.ToLower(opts.LLMProvider()) {
+ case "cohere":
+ return NewCohere(ingress.Client(), opts)
+ case "openrouter":
+ return NewOpenRouter(ingress.Client(), opts)
+ }
+
+ return NewLegacy()
+}
diff --git a/summary/summary_test.go b/summary/summary_test.go
new file mode 100644
index 00000000..0db62241
--- /dev/null
+++ b/summary/summary_test.go
@@ -0,0 +1,74 @@
+// Copyright 2023 Wayback Archiver. All rights reserved.
+// Use of this source code is governed by the GNU GPL v3
+// license that can be found in the LICENSE file.
+
+package summary // import "github.com/wabarc/wayback/summary"
+
+import (
+ "testing"
+
+ "github.com/wabarc/helper"
+ "github.com/wabarc/wayback/config"
+)
+
+func TestSummarize(t *testing.T) {
+ httpClient, mux, server := helper.MockServer()
+ defer server.Close()
+
+ mux.HandleFunc("/", handleFunc)
+
+ t.Setenv("WAYBACK_LLM_PROVIDER", "cohere")
+ t.Setenv("WAYBACK_LLM_APIKEY", "test-key")
+
+ parser := config.NewParser()
+ opts, err := parser.ParseEnvironmentVariables()
+ if err != nil {
+ t.Fatalf("Parse environment variables or flags failed, error: %v", err)
+ }
+
+ coh := NewCohere(httpClient, opts)
+
+ tests := []struct {
+ name string
+ handler Summarizer
+ input string
+ wantErr bool
+ errMessage string
+ }{
+ {
+ name: "Valid Cohere handler",
+ handler: coh,
+ input: "This is a test string.",
+ wantErr: false,
+ errMessage: "",
+ },
+ {
+ name: "Valid Locally handler",
+ handler: NewLegacy(),
+ input: "This is a test string.",
+ wantErr: false,
+ errMessage: "",
+ },
+ {
+ name: "Empty input",
+ handler: coh,
+ input: "",
+ wantErr: true,
+ errMessage: "text not found",
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ _, err := tt.handler.Summarize(tt.input)
+
+ if (err != nil) != tt.wantErr {
+ t.Fatalf(`Unexpected error status. Got "%v", but wanted error="%v"`, err, tt.wantErr)
+ }
+
+ if tt.wantErr && err.Error() != tt.errMessage {
+ t.Fatalf(`Unexpected error message. Got "%v", but wanted "%v"`, err.Error(), tt.errMessage)
+ }
+ })
+ }
+}
diff --git a/template/render/discord.go b/template/render/discord.go
index 809ba788..b4ecfb54 100644
--- a/template/render/discord.go
+++ b/template/render/discord.go
@@ -59,7 +59,7 @@ func (d *Discord) ForPublish() (r *Render) {
tmplBytes.WriteString("\n\n")
}
- if dgst := Digest(d.Cols, d.Data); dgst != "" {
+ if dgst := summaryOrDigest(d.Cols, d.Data); dgst != "" {
tmplBytes.WriteString(dgst)
tmplBytes.WriteString("\n\n")
}
diff --git a/template/render/github.go b/template/render/github.go
index 81988f3a..28834bd9 100644
--- a/template/render/github.go
+++ b/template/render/github.go
@@ -33,7 +33,7 @@ func (gh *GitHub) ForReply() *Render {
func (gh *GitHub) ForPublish() *Render {
var tmplBytes bytes.Buffer
- if dgst := Digest(gh.Cols, gh.Data); dgst != "" {
+ if dgst := summaryOrDigest(gh.Cols, gh.Data); dgst != "" {
tmplBytes.WriteString(dgst)
tmplBytes.WriteString("\n\n")
}
diff --git a/template/render/matrix.go b/template/render/matrix.go
index a810408f..511c8ce7 100644
--- a/template/render/matrix.go
+++ b/template/render/matrix.go
@@ -66,7 +66,7 @@ func (m *Matrix) ForPublish() *Render {
tmplBytes.WriteString(` ›
`)
}
- if dgst := Digest(m.Cols, m.Data); dgst != "" {
+ if dgst := summaryOrDigest(m.Cols, m.Data); dgst != "" {
tmplBytes.WriteString(dgst)
tmplBytes.WriteString(`
`)
}
diff --git a/template/render/render.go b/template/render/render.go
index a45c9c85..71192217 100644
--- a/template/render/render.go
+++ b/template/render/render.go
@@ -157,8 +157,8 @@ func Title(cols []wayback.Collect, rdx reduxer.Reduxer) (title string) {
return
}
-// Digest returns digest of the webpage content. Its maximum length is defined by `maxDigestLen`.
-func Digest(cols []wayback.Collect, rdx reduxer.Reduxer) (dgst string) {
+// digest returns digest of the webpage content. Its maximum length is defined by `maxDigestLen`.
+func digest(cols []wayback.Collect, rdx reduxer.Reduxer) (dgst string) {
if rdx == nil {
return
}
@@ -185,6 +185,42 @@ func Digest(cols []wayback.Collect, rdx reduxer.Reduxer) (dgst string) {
return
}
+// summary returns summary of the webpage content. Its maximum length is defined by `maxDigestLen`.
+func summary(cols []wayback.Collect, rdx reduxer.Reduxer) (dgst string) {
+ if rdx == nil {
+ return
+ }
+
+ for uri := range deDepURI(cols) {
+ if bundle, ok := rdx.Load(reduxer.Src(uri)); ok {
+ if text := bundle.Summary(); text != "" {
+ logger.Debug("extracted summary from article content: %s", text)
+ t := []rune(text)
+ l := len(t)
+ switch {
+ case l == 0:
+ continue
+ case l > maxDigestLen:
+ t = t[:maxDigestLen]
+ dgst += string(t) + ` ...`
+ default:
+ dgst += string(t)
+ }
+ }
+ }
+ }
+
+ return
+}
+
+func summaryOrDigest(cols []wayback.Collect, rdx reduxer.Reduxer) string {
+ if sum := summary(cols, rdx); sum != "" {
+ return sum
+ }
+
+ return digest(cols, rdx)
+}
+
// writeArtifact writes archived artifact of the webpage.
func writeArtifact(cols []wayback.Collect, rdx reduxer.Reduxer, fn func(art reduxer.Artifact)) {
if rdx == nil {
diff --git a/template/render/slack.go b/template/render/slack.go
index 880c56d4..4027fb08 100644
--- a/template/render/slack.go
+++ b/template/render/slack.go
@@ -61,7 +61,7 @@ func (s *Slack) ForPublish() (r *Render) {
tmplBytes.WriteString(" ›\n\n")
}
- if dgst := Digest(s.Cols, s.Data); dgst != "" {
+ if dgst := summaryOrDigest(s.Cols, s.Data); dgst != "" {
tmplBytes.WriteString(dgst)
tmplBytes.WriteString("\n\n")
}
diff --git a/template/render/telegram.go b/template/render/telegram.go
index c64bb30f..b7c94fea 100644
--- a/template/render/telegram.go
+++ b/template/render/telegram.go
@@ -69,7 +69,7 @@ func (t *Telegram) ForPublish() (r *Render) {
tmplBytes.WriteString("\n\n")
}
- if dgst := Digest(t.Cols, t.Data); dgst != "" {
+ if dgst := summaryOrDigest(t.Cols, t.Data); dgst != "" {
tmplBytes.WriteString(dgst)
tmplBytes.WriteString("\n\n")
}
diff --git a/wayback.1 b/wayback.1
index ae044242..9d6dcc70 100644
--- a/wayback.1
+++ b/wayback.1
@@ -224,6 +224,17 @@ Directory to store binary file, e.g. PDF, html file\&.
.B WAYBACK_MAX_MEDIA_SIZE
Max size to limit download stream media. default 512MB\&.
.TP
+.B WAYBACK_LLM_PROVIDER
+Enables AI-enhanced summary. Provider options: cohere | openrouter\&.
+.TP
+.B WAYBACK_LLM_APIKEY
+LLM API key\&.
+.TP
+.B WAYBACK_LLM_MODEL
+LLM model. Each provider has a sensible default:
+.br
+cohere: command-a-03-2025 | openrouter: openrouter/auto\&.
+.TP
.B WAYBACK_MEDIA_SITES
Extra media websites wish to be supported, separate with comma\&.
.TP
diff --git a/wayback.conf b/wayback.conf
index 57a2bdd4..291c278d 100644
--- a/wayback.conf
+++ b/wayback.conf
@@ -76,6 +76,9 @@ WAYBACK_USERAGENT=WaybackArchiver/1.0
WAYBACK_FALLBACK=off
WAYBACK_PROXY=
WAYBACK_PRIVACY_URL=
+WAYBACK_LLM_PROVIDER=
+WAYBACK_LLM_APIKEY=
+WAYBACK_LLM_MODEL=
# ipfs slot: infura, pinata
# doc: https://github.com/wabarc/ipfs-pinner#supported-pinning-services