diff --git a/README.md b/README.md
index 75b44d4..6a720fa 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ Let's look at a few cases:
Seems simple enough. But then we also have these cases:
* `https://en.wikipedia.org/wiki/Link_(The_Legend_of_Zelda)` should include the trailing paren
+* `http://example.com/./` should include the dot because the slash makes it a path
* `http://üñîçøðé.com/ä` should also work for Unicode (including Emoji and Punycode)
* `` should not include angle brackets
diff --git a/src/url.rs b/src/url.rs
index 3f106e9..c1bfc9b 100644
--- a/src/url.rs
+++ b/src/url.rs
@@ -207,7 +207,7 @@ fn find_url_end(s: &str, quote: Option, iri_parsing_enabled: bool) -> Opti
let mut curly = 0;
let mut single_quote = false;
- let mut previous_can_be_last = true;
+ let mut previous_is_url_char = true;
let mut end = Some(0);
if !s[0..].starts_with("/") && !s[0..].starts_with("?") {
@@ -232,9 +232,12 @@ fn find_url_end(s: &str, quote: Option, iri_parsing_enabled: bool) -> Opti
false
}
'/' => {
- // This may be part of an URL and at the end, but not if the previous character
- // can't be the end of an URL
- previous_can_be_last
+ // A slash can be the end of a URL if the previous character is a valid URL
+ // character. This means that delimiters like `!` or `.` before a `/` are
+ // included in the URL, e.g. `/!/`, but non-URL characters (like non-ASCII
+ // when IRI parsing is disabled) before a `/` are not.
+ // See https://github.com/robinst/linkify/issues/90
+ previous_is_url_char
}
'(' => {
round += 1;
@@ -288,7 +291,12 @@ fn find_url_end(s: &str, quote: Option, iri_parsing_enabled: bool) -> Opti
if can_be_last {
end = Some(i + c.len_utf8());
}
- previous_can_be_last = can_be_last;
+ // Track whether the current character is a valid URL character (even if it can't
+ // be the last character). Delimiters like `!` are valid URL characters, but
+ // non-ASCII characters are not valid URL characters when IRI parsing is disabled.
+ // This matters for `/` above: a slash after a delimiter like `!` should extend
+ // the URL (e.g. `/!/`), but a slash after a non-URL character should not.
+ previous_is_url_char = c.is_ascii() || iri_parsing_enabled;
}
end
diff --git a/tests/url.rs b/tests/url.rs
index 201b90e..41b1650 100644
--- a/tests/url.rs
+++ b/tests/url.rs
@@ -151,6 +151,30 @@ fn delimiter_at_end_no_protocol() {
assert_urls_without_protocol("example.org/;", "|example.org/|;");
}
+#[test]
+fn delimiter_followed_by_slash() {
+ // https://github.com/robinst/linkify/issues/90
+ assert_linked("http://example.org/!/", "|http://example.org/!/|");
+ assert_linked("http://example.org/test!/", "|http://example.org/test!/|");
+ assert_linked("http://example.org/./", "|http://example.org/./|");
+ assert_linked("http://example.org/,/", "|http://example.org/,/|");
+ assert_linked("http://example.org/:/", "|http://example.org/:/|");
+ assert_linked("http://example.org/;/", "|http://example.org/;/|");
+ assert_linked("http://example.org/?/", "|http://example.org/?/|");
+ // Delimiter followed by slash and more path
+ assert_linked("http://example.org/!/a", "|http://example.org/!/a|");
+ assert_linked("http://example.org/test!/a", "|http://example.org/test!/a|");
+}
+
+#[test]
+fn delimiter_followed_by_slash_no_protocol() {
+ assert_urls_without_protocol("example.org/!/", "|example.org/!/|");
+ assert_urls_without_protocol("example.org/test!/", "|example.org/test!/|");
+ assert_urls_without_protocol("example.org/./", "|example.org/./|");
+ assert_urls_without_protocol("example.org/,/", "|example.org/,/|");
+ assert_urls_without_protocol("example.org/;/", "|example.org/;/|");
+}
+
#[test]
fn matching_punctuation() {
assert_linked("http://example.org/a(b)", "|http://example.org/a(b)|");