Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Let's look at a few cases:
Seems simple enough. But then we also have these cases:

* `https://en.wikipedia.org/wiki/Link_(The_Legend_of_Zelda)` should include the trailing paren
* `http://example.com/./` should include the dot because the slash makes it a path
* `http://üñîçøðé.com/ä` should also work for Unicode (including Emoji and Punycode)
* `<http://example.com/>` should not include angle brackets

Expand Down
18 changes: 13 additions & 5 deletions src/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ fn find_url_end(s: &str, quote: Option<char>, iri_parsing_enabled: bool) -> Opti
let mut curly = 0;
let mut single_quote = false;

let mut previous_can_be_last = true;
let mut previous_is_url_char = true;
let mut end = Some(0);

if !s[0..].starts_with("/") && !s[0..].starts_with("?") {
Expand All @@ -232,9 +232,12 @@ fn find_url_end(s: &str, quote: Option<char>, iri_parsing_enabled: bool) -> Opti
false
}
'/' => {
// This may be part of an URL and at the end, but not if the previous character
// can't be the end of an URL
previous_can_be_last
// A slash can be the end of a URL if the previous character is a valid URL
// character. This means that delimiters like `!` or `.` before a `/` are
// included in the URL, e.g. `/!/`, but non-URL characters (like non-ASCII
// when IRI parsing is disabled) before a `/` are not.
// See https://github.com/robinst/linkify/issues/90
previous_is_url_char
}
'(' => {
round += 1;
Expand Down Expand Up @@ -288,7 +291,12 @@ fn find_url_end(s: &str, quote: Option<char>, iri_parsing_enabled: bool) -> Opti
if can_be_last {
end = Some(i + c.len_utf8());
}
previous_can_be_last = can_be_last;
// Track whether the current character is a valid URL character (even if it can't
// be the last character). Delimiters like `!` are valid URL characters, but
// non-ASCII characters are not valid URL characters when IRI parsing is disabled.
// This matters for `/` above: a slash after a delimiter like `!` should extend
// the URL (e.g. `/!/`), but a slash after a non-URL character should not.
previous_is_url_char = c.is_ascii() || iri_parsing_enabled;
}

end
Expand Down
24 changes: 24 additions & 0 deletions tests/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,30 @@ fn delimiter_at_end_no_protocol() {
assert_urls_without_protocol("example.org/;", "|example.org/|;");
}

#[test]
fn delimiter_followed_by_slash() {
// https://github.com/robinst/linkify/issues/90
assert_linked("http://example.org/!/", "|http://example.org/!/|");
assert_linked("http://example.org/test!/", "|http://example.org/test!/|");
assert_linked("http://example.org/./", "|http://example.org/./|");
assert_linked("http://example.org/,/", "|http://example.org/,/|");
assert_linked("http://example.org/:/", "|http://example.org/:/|");
assert_linked("http://example.org/;/", "|http://example.org/;/|");
assert_linked("http://example.org/?/", "|http://example.org/?/|");
// Delimiter followed by slash and more path
assert_linked("http://example.org/!/a", "|http://example.org/!/a|");
assert_linked("http://example.org/test!/a", "|http://example.org/test!/a|");
}

#[test]
fn delimiter_followed_by_slash_no_protocol() {
assert_urls_without_protocol("example.org/!/", "|example.org/!/|");
assert_urls_without_protocol("example.org/test!/", "|example.org/test!/|");
assert_urls_without_protocol("example.org/./", "|example.org/./|");
assert_urls_without_protocol("example.org/,/", "|example.org/,/|");
assert_urls_without_protocol("example.org/;/", "|example.org/;/|");
}

#[test]
fn matching_punctuation() {
assert_linked("http://example.org/a(b)", "|http://example.org/a(b)|");
Expand Down
Loading