Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 17 additions & 11 deletions core/src/main/antlr4/org/evomaster/core/parser/RegexEcma262.g4
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ grammar RegexEcma262;
//------ PARSER ------------------------------
// Parser rules have first letter in lower-case

pattern : disjunction;
pattern : disjunction EOF;


disjunction
Expand Down Expand Up @@ -96,13 +96,13 @@ atom


//TODO
//CharacterEscape
CharacterEscape
// : ControlEscape
// | 'c' ControlLetter
// | HexEscapeSequence
// | UnicodeEscapeSequence
: HexEscapeSequence
| UnicodeEscapeSequence
//| IdentityEscape
// ;
;

//TODO
//ControlEscape
Expand Down Expand Up @@ -205,7 +205,7 @@ AtomEscape
: '\\' CharacterClassEscape
//TODO
// | '\\' DecimalEscape
// | '\\' CharacterEscape
| '\\' CharacterEscape
;

fragment CharacterClassEscape
Expand Down Expand Up @@ -238,11 +238,17 @@ BaseChar
: ~[0-9,^$\\.*+?()[\]{}|-]
;

//TODO
//HexEscapeSequence
// : 'x' HexDigit HexDigit
// ;
//
UnicodeEscapeSequence
: 'u' HexDigit HexDigit HexDigit HexDigit
;

HexEscapeSequence
: 'x' HexDigit HexDigit
;

fragment HexDigit:
[a-fA-F0-9]
;

//TODO
//DecimalIntegerLiteral
Expand Down
28 changes: 17 additions & 11 deletions core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ grammar RegexJava;
//------ PARSER ------------------------------
// Parser rules have first letter in lower-case

pattern : disjunction;
pattern : disjunction EOF;


disjunction
Expand Down Expand Up @@ -119,13 +119,13 @@ quoteChar
;

//TODO
//CharacterEscape
CharacterEscape
// : ControlEscape
// | 'c' ControlLetter
// | HexEscapeSequence
// | UnicodeEscapeSequence
: HexEscapeSequence
| UnicodeEscapeSequence
//| IdentityEscape
// ;
;

//TODO
//ControlEscape
Expand Down Expand Up @@ -230,7 +230,7 @@ AtomEscape
: '\\' CharacterClassEscape
//TODO
// | '\\' DecimalEscape
// | '\\' CharacterEscape
| '\\' CharacterEscape
;

fragment CharacterClassEscape
Expand Down Expand Up @@ -267,11 +267,17 @@ BaseChar
: ~[0-9,^$\\.*+?()[\]{}|-]
;

//TODO
//HexEscapeSequence
// : 'x' HexDigit HexDigit
// ;
//
UnicodeEscapeSequence:
'u' HexDigit HexDigit HexDigit HexDigit
;

HexEscapeSequence
: 'x' HexDigit HexDigit
;

fragment HexDigit:
[a-fA-F0-9]
;

//TODO
//DecimalIntegerLiteral
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package org.evomaster.core.parser

import org.evomaster.core.search.gene.regex.*

private const val EOF_TOKEN = "<EOF>"
/**
* Parser Visitor based on the RegexEcma262.g4 grammar file
*/
Expand All @@ -16,7 +17,8 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor<VisitResult>(){

val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene })

val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}$text")
// we remove the <EOF> token from end of the string to store as sourceRegex
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}${text.substring(0,text.length - EOF_TOKEN.length)}")

return VisitResult(gene)
}
Expand Down Expand Up @@ -166,9 +168,22 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor<VisitResult>(){
return VisitResult(gene)
}

if(ctx.AtomEscape() != null){
val char = ctx.AtomEscape().text[1].toString()
return VisitResult(CharacterClassEscapeRxGene(char))
if(ctx.AtomEscape() != null) {
val txt = ctx.AtomEscape().text
when {
txt[1] == 'x' || txt[1] == 'u' -> {
val hexValue =
txt.subSequence(2, txt.length).toString().toInt(16)
return VisitResult(
PatternCharacterBlockGene(
txt,
hexValue.toChar().toString()
)
)
}

else -> return VisitResult(CharacterClassEscapeRxGene(txt[1].toString()))
}
}

if(ctx.disjunction() != null){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package org.evomaster.core.parser

import org.evomaster.core.search.gene.regex.*

private const val EOF_TOKEN = "<EOF>"
/**
* Created by arcuri82 on 11-Sep-19.
*/
Expand All @@ -16,7 +17,8 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor<VisitResult>(){

val disjList = DisjunctionListRxGene(res.genes.map { it as DisjunctionRxGene })

val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}$text")
// we remove the <EOF> token from end of the string to store as sourceRegex
val gene = RegexGene("regex", disjList,"${RegexGene.JAVA_REGEX_PREFIX}${text.substring(0, text.length - EOF_TOKEN.length)}")

return VisitResult(gene)
}
Expand Down Expand Up @@ -179,8 +181,20 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor<VisitResult>(){
}

if(ctx.AtomEscape() != null){
val char = ctx.AtomEscape().text[1].toString()
return VisitResult(CharacterClassEscapeRxGene(char))
val txt = ctx.AtomEscape().text
when {
txt[1] == 'x' || txt[1] == 'u' -> {
val hexValue =
txt.subSequence(2, txt.length).toString().toInt(16)
return VisitResult(
PatternCharacterBlockGene(
txt,
hexValue.toChar().toString()
)
)
}
else -> return VisitResult(CharacterClassEscapeRxGene(txt[1].toString()))
}
}

if(ctx.disjunction() != null){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,4 +326,14 @@ open class GeneRegexEcma262VisitorTest : RegexTestTemplate(){
// p = 1 / 2^6 = 1 / 64
checkCanSample("^((a|A)(b|B)(c|C)123(e|E)(f|F)(d|D))$", "aBc123EFd", 10_000)
}

@Test
fun testHexEscape(){
checkSameAsJava("""\x00\x0a\xba\xFF""")

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there any tests for "invalid" hex values?
What happens if "\xF" or "\xX" is tested?

}

@Test
fun testUnicodeEscape(){
checkSameAsJava("""\u0000\u0a0b\uffff""")
}
}
11 changes: 11 additions & 0 deletions core/src/test/kotlin/org/evomaster/core/parser/RegexHandlerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,15 @@ internal class RegexHandlerTest{

}

@Test
fun testCreateGeneForJVMInvalidRegex() {

assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForJVM("\\xR") }
}

@Test
fun testCreateGeneForEcma262InvalidRegex() {

assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForEcma262("\\xR") }
}
}