diff --git a/CHANGELOG.md b/CHANGELOG.md index f6acdf791..e5dda59e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ The following emojis are used to highlight certain changes: ### Added - `gateway`: `GET`/`HEAD /ipfs/bafkqaaa?format=raw` now always returns `200` with an empty body, so probing clients keep marking the gateway as functional even when its backend cannot serve identity CIDs. `bitswap/network/httpnet` sends this [trustless gateway probe](https://specs.ipfs.tech/http-gateways/trustless-gateway/#dedicated-probe-paths) to check providers, and a failed probe drops the provider. Exported as `gateway.EmptyIdentityCID`. [#1179](https://github.com/ipfs/boxo/pull/1179) +- `path`: added `NewPathFromURI`, which accepts native IPFS URIs (`ipfs://cid`, `ipns://name`, `ipld://cid`, and the schemeless `ipfs:`/`ipns:`/`ipld:` forms) and rewrites them to canonical content paths, so values copied from browsers and other tools parse as-is. `NewPath` stays strict and still rejects URI-shaped input, leaving untrusted parsing such as DNSLink records unchanged. [#1182](https://github.com/ipfs/boxo/pull/1182) ### Changed diff --git a/path/path.go b/path/path.go index 8b67af4d0..fdbdb627e 100644 --- a/path/path.go +++ b/path/path.go @@ -129,6 +129,10 @@ func FromCid(cid cid.Cid) ImmutablePath { // The given string is cleaned through [gopath.Clean], but preserving the final // trailing slash. This function returns an error when the given string is not // a valid content path. +// +// NewPath is strict: it accepts only canonical content paths (/ipfs, /ipns, +// /ipld). To also accept native IPFS URIs such as ipfs://{cid}, use +// [NewPathFromURI]. func NewPath(str string) (Path, error) { segments := StringToSegments(str) diff --git a/path/uri.go b/path/uri.go new file mode 100644 index 000000000..722e69324 --- /dev/null +++ b/path/uri.go @@ -0,0 +1,57 @@ +package path + +import "strings" + +// NewPathFromURI is like [NewPath] but also accepts a native IPFS URI, rewriting +// it to the equivalent canonical content path before parsing: +// +// ipfs://{cid}/sub -> /ipfs/{cid}/sub +// ipns://{name}/sub -> /ipns/{name}/sub +// ipld://{cid}/sub -> /ipld/{cid}/sub +// +// The schemeless ipfs:{cid}, ipns:{name}, and ipld:{cid} forms are accepted too. +// The scheme is matched case-insensitively over ASCII, as URI schemes are +// case-insensitive (RFC 3986). Everything after the scheme is preserved +// byte-for-byte, so a case-sensitive CIDv0 root or a DNSLink name is not altered. +// A string that is already a content path, or is not an IPFS URI, is handed to +// [NewPath] unchanged. +// +// Use this only at input boundaries where values may be copied from a browser or +// another tool. [NewPath] stays strict, so contexts that must accept canonical +// content paths only (such as DNSLink records) are not loosened by this helper. +func NewPathFromURI(str string) (Path, error) { + return NewPath(normalizeURIScheme(str)) +} + +// normalizeURIScheme rewrites a native IPFS URI into a canonical content path. +// A string that is not an IPFS URI is returned unchanged. See [NewPathFromURI]. +func normalizeURIScheme(str string) string { + for _, ns := range [...]string{IPFSNamespace, IPNSNamespace, IPLDNamespace} { + if hasURIScheme(str, ns) { + rest := strings.TrimPrefix(str[len(ns)+1:], "//") // drop the optional "//" authority separator + return "/" + ns + "/" + rest + } + } + return str +} + +// hasURIScheme reports whether str begins with the scheme ns followed by ":" +// (e.g. "ipfs:"). ns is matched case-insensitively over ASCII. +func hasURIScheme(str, ns string) bool { + if len(str) <= len(ns) || str[len(ns)] != ':' { + return false + } + for i := range len(ns) { + if toLowerASCII(str[i]) != ns[i] { + return false + } + } + return true +} + +func toLowerASCII(b byte) byte { + if b >= 'A' && b <= 'Z' { + return b + ('a' - 'A') + } + return b +} diff --git a/path/uri_test.go b/path/uri_test.go new file mode 100644 index 000000000..fdc212e56 --- /dev/null +++ b/path/uri_test.go @@ -0,0 +1,118 @@ +package path + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewPathFromURI(t *testing.T) { + t.Parallel() + + t.Run("Valid URIs", func(t *testing.T) { + t.Parallel() + + testCases := []struct { + src string + canonical string + namespace string + mutable bool + }{ + // ipfs:// with CIDv1, with and without a sub-path + {"ipfs://bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku", "/ipfs/bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku", IPFSNamespace, false}, + {"ipfs://bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku/a/b", "/ipfs/bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku/a/b", IPFSNamespace, false}, + + // CIDv0 is base58 and case-sensitive: the root must survive untouched. + {"ipfs://QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n", "/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n", IPFSNamespace, false}, + {"ipfs://QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a", "/ipfs/QmdfTbBqBPQ7VNxZEYEj14VmRuZBkqFbiwReogJgS1zR1n/a", IPFSNamespace, false}, + + // ipns:// with an IPNS key and with a DNSLink name + {"ipns://bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku", "/ipns/bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku", IPNSNamespace, true}, + {"ipns://docs.ipfs.tech/concepts", "/ipns/docs.ipfs.tech/concepts", IPNSNamespace, true}, + + // ipld:// maps to the /ipld namespace + {"ipld://bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku/a", "/ipld/bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku/a", IPLDNamespace, false}, + + // Schemeless forms: ipfs:{cid}, ipns:{name}, ipld:{cid} + {"ipfs:bafkqaaa", "/ipfs/bafkqaaa", IPFSNamespace, false}, + {"ipns:docs.ipfs.tech", "/ipns/docs.ipfs.tech", IPNSNamespace, true}, + {"ipld:bafkqaaa", "/ipld/bafkqaaa", IPLDNamespace, false}, + + // Scheme is case-insensitive (RFC 3986); the rest is preserved. + {"IPFS://bafkqaaa", "/ipfs/bafkqaaa", IPFSNamespace, false}, + {"IpNs://docs.ipfs.tech", "/ipns/docs.ipfs.tech", IPNSNamespace, true}, + + // Trailing slash is preserved, mirroring /ipfs/cid/ behaviour. + {"ipfs://bafkqaaa/", "/ipfs/bafkqaaa/", IPFSNamespace, false}, + + // Cleaning still applies after normalization. + {"ipfs://bafkqaaa/a/b/../c", "/ipfs/bafkqaaa/a/c", IPFSNamespace, false}, + + // A canonical path passes straight through to NewPath. + {"/ipfs/bafkqaaa", "/ipfs/bafkqaaa", IPFSNamespace, false}, + {"/ipns/docs.ipfs.tech", "/ipns/docs.ipfs.tech", IPNSNamespace, true}, + } + + for _, testCase := range testCases { + p, err := NewPathFromURI(testCase.src) + assert.NoErrorf(t, err, "input %q", testCase.src) + if err != nil { + continue + } + assert.Equalf(t, testCase.canonical, p.String(), "input %q", testCase.src) + assert.Equalf(t, testCase.namespace, p.Namespace(), "input %q", testCase.src) + assert.Equalf(t, testCase.mutable, p.Mutable(), "input %q", testCase.src) + } + }) + + t.Run("Invalid URIs still error", func(t *testing.T) { + t.Parallel() + + // Empty-after-scheme reduces to "/ipfs/" etc., which NewPath rejects. + for _, src := range []string{"ipfs:", "ipns:", "ipld:", "ipfs://", "ipns://", "IPFS://"} { + _, err := NewPathFromURI(src) + assert.ErrorIsf(t, err, ErrInsufficientComponents, "input %q", src) + assert.ErrorIsf(t, err, &ErrInvalidPath{}, "input %q always an ErrInvalidPath", src) + } + + // A malformed CID still fails (as an ErrInvalidPath wrapping the CID error). + _, err := NewPathFromURI("ipfs://notacid") + assert.ErrorIs(t, err, &ErrInvalidPath{}) + }) + + t.Run("Returns ImmutablePath for ipfs:// URIs", func(t *testing.T) { + t.Parallel() + + p, err := NewPathFromURI("ipfs://bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku/a") + assert.NoError(t, err) + assert.IsType(t, ImmutablePath{}, p) + }) + + // A literal value that merely starts with "ipns"/"ipfs" but has no ":" right + // after the namespace must not be rewritten. + t.Run("Does not misfire on non-URI input", func(t *testing.T) { + t.Parallel() + + for _, src := range []string{"ipfsfile", "ipns-notes.txt", "ipfsbar:baz", "ipns", "/ipfs/bafkqaaa"} { + assert.Equalf(t, src, normalizeURIScheme(src), "input %q must be unchanged", src) + } + }) +} + +// TestNewPathStaysStrict guards the security boundary: NewPath itself must keep +// rejecting native IPFS URIs, so contexts that parse untrusted strings (such as +// DNSLink records) are not loosened. Only NewPathFromURI accepts URIs. +func TestNewPathStaysStrict(t *testing.T) { + t.Parallel() + + for _, src := range []string{ + "ipfs://bafybeihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku", + "ipns://attacker.example", + "ipfs:bafkqaaa", + "ipns:attacker.example", + } { + _, err := NewPath(src) + assert.Errorf(t, err, "NewPath must reject URI %q", src) + assert.ErrorIsf(t, err, &ErrInvalidPath{}, "input %q", src) + } +}