diff --git a/src/main/java/com/vdurmont/emoji/EmojiParser.java b/src/main/java/com/vdurmont/emoji/EmojiParser.java
index ccb9b929..c55a38af 100644
--- a/src/main/java/com/vdurmont/emoji/EmojiParser.java
+++ b/src/main/java/com/vdurmont/emoji/EmojiParser.java
@@ -3,6 +3,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -14,7 +15,8 @@
*/
public class EmojiParser {
private static final Pattern ALIAS_CANDIDATE_PATTERN =
- Pattern.compile("(?<=:)\\+?(\\w|\\||\\-)+(?=:)");
+ Pattern.compile("\\S*?((?<=:)\\+?(\\w|\\||\\-)+(?=:))\\w*");
+ private static final Pattern URL_PATTERN = Pattern.compile("(https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]\\.[^\\s]{2,})");
/**
* See {@link #parseToAliases(String, FitzpatrickAction)} with the action
@@ -101,7 +103,6 @@ public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) {
return parseFromUnicode(str, emojiTransformer);
}
-
/**
* Replaces the emoji's aliases (between 2 ':') occurrences and the html
* representations by their unicode.
@@ -116,8 +117,21 @@ public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) {
* their unicode.
*/
public static String parseToUnicode(String input) {
+ return parseToUnicode(input,false);
+ }
+
+ /**
+ * {@link #parseToUnicode(String)}
+ *
+ * @param input the string to parse
+ * @param shouldIgnoreUrls should emojis inside Urls be ignored
+ *
+ * @return the string with the aliases and html representations replaced by
+ * their unicode. It will ignore emojis inside urls depending on shouldIgnoreUrls flag
+ */
+ public static String parseToUnicode(String input, boolean shouldIgnoreUrls) {
// Get all the potential aliases
- List candidates = getAliasCandidates(input);
+ List candidates = getAliasCandidates(input, shouldIgnoreUrls);
// Replace the aliases by their unicode
String result = input;
@@ -132,10 +146,7 @@ public static String parseToUnicode(String input) {
if (candidate.fitzpatrick != null) {
replacement += candidate.fitzpatrick.unicode;
}
- result = result.replace(
- ":" + candidate.fullString + ":",
- replacement
- );
+ result = replaceFirstFrom(result, candidate.position, Pattern.quote(":" + candidate.fullString + ":"), replacement);
}
}
}
@@ -148,25 +159,42 @@ public static String parseToUnicode(String input) {
return result;
}
-
protected static List getAliasCandidates(String input) {
- List candidates = new ArrayList();
+ return getAliasCandidates(input,false);
+ }
+ protected static List getAliasCandidates(String input, boolean shouldIgnoreUrls) {
+ List words = Arrays.asList(input.split("\\s"));
- Matcher matcher = ALIAS_CANDIDATE_PATTERN.matcher(input);
- matcher = matcher.useTransparentBounds(true);
- while (matcher.find()) {
- String match = matcher.group();
- if (!match.contains("|")) {
- candidates.add(new AliasCandidate(match, match, null));
- } else {
- String[] splitted = match.split("\\|");
- if (splitted.length == 2 || splitted.length > 2) {
- candidates.add(new AliasCandidate(match, splitted[0], splitted[1]));
+ List candidates = new ArrayList();
+ int position;
+ int nextPosition = 0;
+ for(String word : words) {
+ Matcher matcher = ALIAS_CANDIDATE_PATTERN.matcher(word);
+ matcher = matcher.useTransparentBounds(true);
+
+ while (matcher.find()) {
+ String fullWord = matcher.group();
+ position = input.indexOf(fullWord, nextPosition);
+ nextPosition = position + fullWord.length();
+ //Do not render emojis inside URLs
+ if (shouldIgnoreUrls && URL_PATTERN.matcher(word).matches()) {
+ continue;
+ }
+ String match = matcher.group(1);
+ if (!match.contains("|")) {
+ candidates.add(new AliasCandidate(match, match, null, position));
} else {
- candidates.add(new AliasCandidate(match, match, null));
+ String[] splitted = match.split("\\|");
+ if (splitted.length == 2 || splitted.length > 2) {
+ candidates.add(new AliasCandidate(match, splitted[0], splitted[1], position));
+ } else {
+ candidates.add(new AliasCandidate(match, match, null, position));
+ }
}
}
}
+
+ Collections.reverse(candidates);
return candidates;
}
@@ -406,6 +434,23 @@ protected static List getUnicodeCandidates(String input) {
return candidates;
}
+ /**
+ * Replace util with start position
+ *
+ * @param str original string
+ * @param from start positon
+ * @param regex
+ * @param replacement
+ * @return
+ */
+ private static String replaceFirstFrom(String str, int from, String regex, String replacement)
+ {
+ String prefix = str.substring(0, from);
+ String rest = str.substring(from);
+ rest = rest.replaceFirst(regex, replacement);
+ return prefix+rest;
+ }
+
/**
* Finds the next UnicodeCandidate after a given starting index
*
@@ -517,12 +562,15 @@ protected static class AliasCandidate {
public final String fullString;
public final String alias;
public final Fitzpatrick fitzpatrick;
+ public final int position;
private AliasCandidate(
String fullString,
String alias,
- String fitzpatrickString
+ String fitzpatrickString,
+ int position
) {
+ this.position = position;
this.fullString = fullString;
this.alias = alias;
if (fitzpatrickString == null) {
diff --git a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java
index 7575b77d..a79a2555 100644
--- a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java
+++ b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java
@@ -311,6 +311,22 @@ public void parseToUnicode_with_the_thumbsup_emoji_in_hex_replaces_the_alias_by_
);
}
+ @Test
+ public void parseToUnicode_with_link_replaces_the_alias_by_the_emoji() {
+ // GIVEN
+ String str = ":smiley: The link is https://www.aaa.com/link_with_emoji_alias_:smiley:__:smiley: hello :smiley:.";
+
+ // WHEN
+ String result = EmojiParser.parseToUnicode(str, true);
+
+ // THEN
+ assertEquals(
+ "😃 The link is https://www.aaa.com/link_with_emoji_alias_:smiley:__:smiley: hello 😃.",
+ result
+ );
+
+ }
+
@Test
public void parseToUnicode_with_a_fitzpatrick_modifier() {
// GIVEN
@@ -400,9 +416,9 @@ public void getAliasCanditates_with_two_aliases() {
// THEN
assertEquals(2, candidates.size());
- assertEquals("candi", candidates.get(0).alias);
+ assertEquals("candi", candidates.get(1).alias);
assertNull(candidates.get(0).fitzpatrick);
- assertEquals("candidate", candidates.get(1).alias);
+ assertEquals("candidate", candidates.get(0).alias);
assertNull(candidates.get(1).fitzpatrick);
}
@@ -416,9 +432,9 @@ public void getAliasCanditates_with_two_aliases_sharing_a_colon() {
// THEN
assertEquals(2, candidates.size());
- assertEquals("candi", candidates.get(0).alias);
+ assertEquals("candi", candidates.get(1).alias);
assertNull(candidates.get(0).fitzpatrick);
- assertEquals("candidate", candidates.get(1).alias);
+ assertEquals("candidate", candidates.get(0).alias);
assertNull(candidates.get(1).fitzpatrick);
}