diff --git a/src/main/java/com/vdurmont/emoji/EmojiParser.java b/src/main/java/com/vdurmont/emoji/EmojiParser.java index ccb9b929..c55a38af 100644 --- a/src/main/java/com/vdurmont/emoji/EmojiParser.java +++ b/src/main/java/com/vdurmont/emoji/EmojiParser.java @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -14,7 +15,8 @@ */ public class EmojiParser { private static final Pattern ALIAS_CANDIDATE_PATTERN = - Pattern.compile("(?<=:)\\+?(\\w|\\||\\-)+(?=:)"); + Pattern.compile("\\S*?((?<=:)\\+?(\\w|\\||\\-)+(?=:))\\w*"); + private static final Pattern URL_PATTERN = Pattern.compile("(https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]\\.[^\\s]{2,})"); /** * See {@link #parseToAliases(String, FitzpatrickAction)} with the action @@ -101,7 +103,6 @@ public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) { return parseFromUnicode(str, emojiTransformer); } - /** * Replaces the emoji's aliases (between 2 ':') occurrences and the html * representations by their unicode.
@@ -116,8 +117,21 @@ public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) { * their unicode. */ public static String parseToUnicode(String input) { + return parseToUnicode(input,false); + } + + /** + * {@link #parseToUnicode(String)} + * + * @param input the string to parse + * @param shouldIgnoreUrls should emojis inside Urls be ignored + * + * @return the string with the aliases and html representations replaced by + * their unicode. It will ignore emojis inside urls depending on shouldIgnoreUrls flag + */ + public static String parseToUnicode(String input, boolean shouldIgnoreUrls) { // Get all the potential aliases - List candidates = getAliasCandidates(input); + List candidates = getAliasCandidates(input, shouldIgnoreUrls); // Replace the aliases by their unicode String result = input; @@ -132,10 +146,7 @@ public static String parseToUnicode(String input) { if (candidate.fitzpatrick != null) { replacement += candidate.fitzpatrick.unicode; } - result = result.replace( - ":" + candidate.fullString + ":", - replacement - ); + result = replaceFirstFrom(result, candidate.position, Pattern.quote(":" + candidate.fullString + ":"), replacement); } } } @@ -148,25 +159,42 @@ public static String parseToUnicode(String input) { return result; } - protected static List getAliasCandidates(String input) { - List candidates = new ArrayList(); + return getAliasCandidates(input,false); + } + protected static List getAliasCandidates(String input, boolean shouldIgnoreUrls) { + List words = Arrays.asList(input.split("\\s")); - Matcher matcher = ALIAS_CANDIDATE_PATTERN.matcher(input); - matcher = matcher.useTransparentBounds(true); - while (matcher.find()) { - String match = matcher.group(); - if (!match.contains("|")) { - candidates.add(new AliasCandidate(match, match, null)); - } else { - String[] splitted = match.split("\\|"); - if (splitted.length == 2 || splitted.length > 2) { - candidates.add(new AliasCandidate(match, splitted[0], splitted[1])); + List candidates = new ArrayList(); + int position; + int nextPosition = 0; + for(String word : words) { + Matcher matcher = ALIAS_CANDIDATE_PATTERN.matcher(word); + matcher = matcher.useTransparentBounds(true); + + while (matcher.find()) { + String fullWord = matcher.group(); + position = input.indexOf(fullWord, nextPosition); + nextPosition = position + fullWord.length(); + //Do not render emojis inside URLs + if (shouldIgnoreUrls && URL_PATTERN.matcher(word).matches()) { + continue; + } + String match = matcher.group(1); + if (!match.contains("|")) { + candidates.add(new AliasCandidate(match, match, null, position)); } else { - candidates.add(new AliasCandidate(match, match, null)); + String[] splitted = match.split("\\|"); + if (splitted.length == 2 || splitted.length > 2) { + candidates.add(new AliasCandidate(match, splitted[0], splitted[1], position)); + } else { + candidates.add(new AliasCandidate(match, match, null, position)); + } } } } + + Collections.reverse(candidates); return candidates; } @@ -406,6 +434,23 @@ protected static List getUnicodeCandidates(String input) { return candidates; } + /** + * Replace util with start position + * + * @param str original string + * @param from start positon + * @param regex + * @param replacement + * @return + */ + private static String replaceFirstFrom(String str, int from, String regex, String replacement) + { + String prefix = str.substring(0, from); + String rest = str.substring(from); + rest = rest.replaceFirst(regex, replacement); + return prefix+rest; + } + /** * Finds the next UnicodeCandidate after a given starting index * @@ -517,12 +562,15 @@ protected static class AliasCandidate { public final String fullString; public final String alias; public final Fitzpatrick fitzpatrick; + public final int position; private AliasCandidate( String fullString, String alias, - String fitzpatrickString + String fitzpatrickString, + int position ) { + this.position = position; this.fullString = fullString; this.alias = alias; if (fitzpatrickString == null) { diff --git a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java index 7575b77d..a79a2555 100644 --- a/src/test/java/com/vdurmont/emoji/EmojiParserTest.java +++ b/src/test/java/com/vdurmont/emoji/EmojiParserTest.java @@ -311,6 +311,22 @@ public void parseToUnicode_with_the_thumbsup_emoji_in_hex_replaces_the_alias_by_ ); } + @Test + public void parseToUnicode_with_link_replaces_the_alias_by_the_emoji() { + // GIVEN + String str = ":smiley: The link is https://www.aaa.com/link_with_emoji_alias_:smiley:__:smiley: hello :smiley:."; + + // WHEN + String result = EmojiParser.parseToUnicode(str, true); + + // THEN + assertEquals( + "😃 The link is https://www.aaa.com/link_with_emoji_alias_:smiley:__:smiley: hello 😃.", + result + ); + + } + @Test public void parseToUnicode_with_a_fitzpatrick_modifier() { // GIVEN @@ -400,9 +416,9 @@ public void getAliasCanditates_with_two_aliases() { // THEN assertEquals(2, candidates.size()); - assertEquals("candi", candidates.get(0).alias); + assertEquals("candi", candidates.get(1).alias); assertNull(candidates.get(0).fitzpatrick); - assertEquals("candidate", candidates.get(1).alias); + assertEquals("candidate", candidates.get(0).alias); assertNull(candidates.get(1).fitzpatrick); } @@ -416,9 +432,9 @@ public void getAliasCanditates_with_two_aliases_sharing_a_colon() { // THEN assertEquals(2, candidates.size()); - assertEquals("candi", candidates.get(0).alias); + assertEquals("candi", candidates.get(1).alias); assertNull(candidates.get(0).fitzpatrick); - assertEquals("candidate", candidates.get(1).alias); + assertEquals("candidate", candidates.get(0).alias); assertNull(candidates.get(1).fitzpatrick); }