Skip to content

Commit

Permalink
PIG-5362: Parameter substitution of shell cmd results doesn't handle …
Browse files Browse the repository at this point in the history
…backslash addendum (szita)

git-svn-id: https://svn.apache.org/repos/asf/pig/trunk@1876880 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
szlta committed Apr 23, 2020
1 parent bdd9908 commit 0b2066a
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 34 deletions.
2 changes: 2 additions & 0 deletions CHANGES.txt
Expand Up @@ -100,6 +100,8 @@ OPTIMIZATIONS

BUG FIXES

PIG-5362: Parameter substitution of shell cmd results doesn't handle backslash addendum (szita)

PIG-5395: Pig build is failing due to maven repo access point change (szita)

PIG-5375: NullPointerException for multi-level self unions with Tez UnionOptimizer (knoguchi)
Expand Down
55 changes: 42 additions & 13 deletions src/org/apache/pig/tools/parameters/ParamLoader.jj
Expand Up @@ -43,16 +43,6 @@ public class ParamLoader {
public void setContext(PreprocessorContext pc) {
this.pc = pc;
}

private static String unquote(String s)
{
if (s.charAt(0) == '\'' && s.charAt(s.length()-1) == '\'')
return s.substring(1, s.length()-1);
else if (s.charAt(0) == '"' && s.charAt(s.length()-1) == '"')
return s.substring(1, s.length()-1);
else
return s;
}
}

PARSER_END(ParamLoader)
Expand Down Expand Up @@ -81,16 +71,52 @@ TOKEN :
|
<OTHER: ~["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r"] (~["\n","\r"])* >
|
<LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") >
|
<SHELLCMD: "`" (~["`"])* "`" >
|
<EQUALS: "=" >
|
<COMMENT: "#" (~["\n"])* ("\n"|"\r") >
}

MORE :
{
<DOUBLE_QUOTE_START: ("\"") > : DOUBLE_QUOTE
|
<SINGLE_QUOTE_START: ("'") > : SINGLE_QUOTE
}

<DOUBLE_QUOTE> TOKEN :
{
<DOUBLE_QUOTE_LITERAL: ("\"") > {
image.deleteCharAt(image.length()-1);
image.deleteCharAt(0);
matchedToken.image = image.toString();
} : DEFAULT
}

<SINGLE_QUOTE> TOKEN :
{
<SINGLE_QUOTE_LITERAL: ("'") > {
image.deleteCharAt(image.length()-1);
image.deleteCharAt(0);
matchedToken.image = image.toString();
} : DEFAULT
}

<DOUBLE_QUOTE > MORE :
{
<ESCAPED_DOUBLE_QUOTE: ("\\\"")> { image.replace(image.length()-2, image.length(), "\""); }
}

<SINGLE_QUOTE > MORE :
{
<ESCAPED_SINGLE_QUOTE: ("\\'")> { image.replace(image.length()-2, image.length(), "'"); }
}

<DOUBLE_QUOTE,SINGLE_QUOTE> MORE:
{
< (~[]) >
}


boolean Parse() throws IOException :
Expand All @@ -117,7 +143,10 @@ boolean Parse() throws IOException :
|
val=<SHELLCMD> { pc.processShellCmd(id.image , val.image);}
|
val=<LITERAL> { s = unquote(val.image); pc.processOrdLine(id.image , s); }
val=<SINGLE_QUOTE_LITERAL> { pc.processOrdLine(id.image , val.image); }
|
val=<DOUBLE_QUOTE_LITERAL> { pc.processOrdLine(id.image , val.image); }

)
)
|
Expand Down
82 changes: 61 additions & 21 deletions src/org/apache/pig/tools/parameters/PigFileParser.jj
Expand Up @@ -223,25 +223,22 @@ TOKEN_MGR_DECLS : {
}: DEFAULT
}

TOKEN :
< DEFAULT, IN_DECLARE > TOKEN :
{
<NEWLINE: "\n" | "\r">
<NEWLINE: "\n" | "\r"> : DEFAULT
|
<SPACE: " " | "\t">
|
<WS: "\n" | "\r" | " " | "\t">
}

// comments(single line and multi-line)
TOKEN :
{
<COMMENT: "--"(~["\r","\n"])*
|
// comments(single line and multi-line)
<COMMENT: "--"(~["\r","\n"])*
| "#!" (~["\r","\n"])*
| "/*" (~["*"])* "*" ("*" | (~["*","/"] (~["*"])* "*"))* "/"
>
>
}

TOKEN:
< DEFAULT, IN_DECLARE >TOKEN:
{
<#LETTER : ["a"-"z", "A"-"Z"] >
|
Expand All @@ -254,22 +251,31 @@ TOKEN:

TOKEN :
{
<DECLARE: "%declare" >
<DECLARE: "%declare" > : IN_DECLARE
|
<PIGDEFAULT: "%default" >
<PIGDEFAULT: "%default" > : IN_DECLARE
|
<REGISTER: "register"> : IN_REGISTER
|
<LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") >
}


TOKEN :
< DEFAULT, IN_DECLARE > TOKEN:
{
<REGISTER: "register"> : IN_REGISTER
|
<IDENTIFIER: (<SPECIALCHAR>)*<LETTER>(<DIGIT> | <LETTER> | <SPECIALCHAR>)*>
|
<LITERAL: ("\"" ((~["\""])*("\\\"")?)* "\"")|("'" ((~["'"])*("\\\'")?)* "'") >
|
<SHELLCMD: "`" (~["`"])* "`" >
<SHELLCMD: "`" (~["`"])* "`" > : DEFAULT
}

< IN_DECLARE > MORE :
{
<DOUBLE_QUOTE_START: ("\"") > : DOUBLE_QUOTE
|
<SINGLE_QUOTE_START: ("'") > : SINGLE_QUOTE
}

< DEFAULT, IN_DECLARE > TOKEN:
{
// see others() rule for use of OTHER and NOT_OTHER_CHAR
// others() is supposed to match 'everything else'. To ensure that others()
// don't swallow other(all the ones above) tokens, it uses two tokens OTHER and NOT_OTHER_CHAR
Expand All @@ -281,6 +287,39 @@ TOKEN :
<NOT_OTHER_CHAR: ["\"" , "'" , "`" , "a"-"z" , "A"-"Z" , "_" , "#" , "=" , " " , "\n" , "\t" , "\r", "%", "/", "-", "$"] >
}

< DOUBLE_QUOTE > TOKEN :
{
<DOUBLE_QUOTE_LITERAL: ("\"") > {
image.deleteCharAt(image.length()-1);
image.deleteCharAt(0);
matchedToken.image = image.toString();
} : DEFAULT
}

< SINGLE_QUOTE > TOKEN :
{
<SINGLE_QUOTE_LITERAL: ("'") > {
image.deleteCharAt(image.length()-1);
image.deleteCharAt(0);
matchedToken.image = image.toString();
} : DEFAULT
}

< DOUBLE_QUOTE > MORE :
{
<ESCAPED_DOUBLE_QUOTE: ("\\\"")> { image.replace(image.length()-2, image.length(), "\""); }
}

< SINGLE_QUOTE > MORE :
{
<ESCAPED_SINGLE_QUOTE: ("\\'")> { image.replace(image.length()-2, image.length(), "'"); }
}

< DOUBLE_QUOTE, SINGLE_QUOTE > MORE:
{
< (~[]) >
}

<IN_REGISTER> MORE : { " " | "\t" | "\r" | "\n"}

<IN_REGISTER> TOKEN: {
Expand Down Expand Up @@ -373,8 +412,9 @@ void param_value(boolean overwrite) throws IOException:
|
val=<SHELLCMD> { pc.processShellCmd(id.image , val.image, overwrite); }
|
val=<LITERAL> { s = unquote(val.image); pc.processOrdLine(id.image, s, overwrite); }

val=<SINGLE_QUOTE_LITERAL> { pc.processOrdLine(id.image, val.image, overwrite); }
|
val = <DOUBLE_QUOTE_LITERAL> { pc.processOrdLine(id.image, val.image, overwrite); }
)
}

Expand Down

0 comments on commit 0b2066a

Please sign in to comment.