Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 69 additions & 21 deletions src/main/java/com/vdurmont/emoji/EmojiParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand All @@ -14,7 +15,8 @@
*/
public class EmojiParser {
private static final Pattern ALIAS_CANDIDATE_PATTERN =
Pattern.compile("(?<=:)\\+?(\\w|\\||\\-)+(?=:)");
Pattern.compile("\\S*?((?<=:)\\+?(\\w|\\||\\-)+(?=:))\\w*");
private static final Pattern URL_PATTERN = Pattern.compile("(https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\\.[^\\s]{2,}|https?:\\/\\/(?:www\\.|(?!www))[a-zA-Z0-9]\\.[^\\s]{2,}|www\\.[a-zA-Z0-9]\\.[^\\s]{2,})");

/**
* See {@link #parseToAliases(String, FitzpatrickAction)} with the action
Expand Down Expand Up @@ -101,7 +103,6 @@ public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) {
return parseFromUnicode(str, emojiTransformer);
}


/**
* Replaces the emoji's aliases (between 2 ':') occurrences and the html
* representations by their unicode.<br>
Expand All @@ -116,8 +117,21 @@ public String transform(EmojiParser.UnicodeCandidate unicodeCandidate) {
* their unicode.
*/
public static String parseToUnicode(String input) {
return parseToUnicode(input,false);
}

/**
* {@link #parseToUnicode(String)}
*
* @param input the string to parse
* @param shouldIgnoreUrls should emojis inside Urls be ignored
*
* @return the string with the aliases and html representations replaced by
* their unicode. It will ignore emojis inside urls depending on shouldIgnoreUrls flag
*/
public static String parseToUnicode(String input, boolean shouldIgnoreUrls) {
// Get all the potential aliases
List<AliasCandidate> candidates = getAliasCandidates(input);
List<AliasCandidate> candidates = getAliasCandidates(input, shouldIgnoreUrls);

// Replace the aliases by their unicode
String result = input;
Expand All @@ -132,10 +146,7 @@ public static String parseToUnicode(String input) {
if (candidate.fitzpatrick != null) {
replacement += candidate.fitzpatrick.unicode;
}
result = result.replace(
":" + candidate.fullString + ":",
replacement
);
result = replaceFirstFrom(result, candidate.position, Pattern.quote(":" + candidate.fullString + ":"), replacement);
}
}
}
Expand All @@ -148,25 +159,42 @@ public static String parseToUnicode(String input) {

return result;
}

protected static List<AliasCandidate> getAliasCandidates(String input) {
List<AliasCandidate> candidates = new ArrayList<AliasCandidate>();
return getAliasCandidates(input,false);
}
protected static List<AliasCandidate> getAliasCandidates(String input, boolean shouldIgnoreUrls) {
List<String> words = Arrays.asList(input.split("\\s"));

Matcher matcher = ALIAS_CANDIDATE_PATTERN.matcher(input);
matcher = matcher.useTransparentBounds(true);
while (matcher.find()) {
String match = matcher.group();
if (!match.contains("|")) {
candidates.add(new AliasCandidate(match, match, null));
} else {
String[] splitted = match.split("\\|");
if (splitted.length == 2 || splitted.length > 2) {
candidates.add(new AliasCandidate(match, splitted[0], splitted[1]));
List<AliasCandidate> candidates = new ArrayList<AliasCandidate>();
int position;
int nextPosition = 0;
for(String word : words) {
Matcher matcher = ALIAS_CANDIDATE_PATTERN.matcher(word);
matcher = matcher.useTransparentBounds(true);

while (matcher.find()) {
String fullWord = matcher.group();
position = input.indexOf(fullWord, nextPosition);
nextPosition = position + fullWord.length();
//Do not render emojis inside URLs
if (shouldIgnoreUrls && URL_PATTERN.matcher(word).matches()) {
continue;
}
String match = matcher.group(1);
if (!match.contains("|")) {
candidates.add(new AliasCandidate(match, match, null, position));
} else {
candidates.add(new AliasCandidate(match, match, null));
String[] splitted = match.split("\\|");
if (splitted.length == 2 || splitted.length > 2) {
candidates.add(new AliasCandidate(match, splitted[0], splitted[1], position));
} else {
candidates.add(new AliasCandidate(match, match, null, position));
}
}
}
}

Collections.reverse(candidates);
return candidates;
}

Expand Down Expand Up @@ -406,6 +434,23 @@ protected static List<UnicodeCandidate> getUnicodeCandidates(String input) {
return candidates;
}

/**
* Replace util with start position
*
* @param str original string
* @param from start positon
* @param regex
* @param replacement
* @return
*/
private static String replaceFirstFrom(String str, int from, String regex, String replacement)
{
String prefix = str.substring(0, from);
String rest = str.substring(from);
rest = rest.replaceFirst(regex, replacement);
return prefix+rest;
}

/**
* Finds the next UnicodeCandidate after a given starting index
*
Expand Down Expand Up @@ -517,12 +562,15 @@ protected static class AliasCandidate {
public final String fullString;
public final String alias;
public final Fitzpatrick fitzpatrick;
public final int position;

private AliasCandidate(
String fullString,
String alias,
String fitzpatrickString
String fitzpatrickString,
int position
) {
this.position = position;
this.fullString = fullString;
this.alias = alias;
if (fitzpatrickString == null) {
Expand Down
24 changes: 20 additions & 4 deletions src/test/java/com/vdurmont/emoji/EmojiParserTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,22 @@ public void parseToUnicode_with_the_thumbsup_emoji_in_hex_replaces_the_alias_by_
);
}

@Test
public void parseToUnicode_with_link_replaces_the_alias_by_the_emoji() {
// GIVEN
String str = ":smiley: The link is https://www.aaa.com/link_with_emoji_alias_:smiley:__:smiley: hello :smiley:.";

// WHEN
String result = EmojiParser.parseToUnicode(str, true);

// THEN
assertEquals(
"😃 The link is https://www.aaa.com/link_with_emoji_alias_:smiley:__:smiley: hello 😃.",
result
);

}

@Test
public void parseToUnicode_with_a_fitzpatrick_modifier() {
// GIVEN
Expand Down Expand Up @@ -400,9 +416,9 @@ public void getAliasCanditates_with_two_aliases() {

// THEN
assertEquals(2, candidates.size());
assertEquals("candi", candidates.get(0).alias);
assertEquals("candi", candidates.get(1).alias);
assertNull(candidates.get(0).fitzpatrick);
assertEquals("candidate", candidates.get(1).alias);
assertEquals("candidate", candidates.get(0).alias);
assertNull(candidates.get(1).fitzpatrick);
}

Expand All @@ -416,9 +432,9 @@ public void getAliasCanditates_with_two_aliases_sharing_a_colon() {

// THEN
assertEquals(2, candidates.size());
assertEquals("candi", candidates.get(0).alias);
assertEquals("candi", candidates.get(1).alias);
assertNull(candidates.get(0).fitzpatrick);
assertEquals("candidate", candidates.get(1).alias);
assertEquals("candidate", candidates.get(0).alias);
assertNull(candidates.get(1).fitzpatrick);
}

Expand Down