Skip to content

Commit 75417dc

Browse files
committed
fan out of changes to make more generic with grammar start rule
1 parent 3cdf3c0 commit 75417dc

File tree

10 files changed

+445
-51
lines changed

10 files changed

+445
-51
lines changed
Lines changed: 347 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/bin/java -Didea.launcher.port=7540 "-Didea.launcher.bin.path=/Applications/IntelliJ IDEA.app/Contents/bin" -Dfile.encoding=UTF-8 -classpath "/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/lib/ant-javafx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/lib/dt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/lib/javafx-mx.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/lib/jconsole.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/lib/packager.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/lib/sa-jdi.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/lib/tools.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/deploy.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/javaws.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/jfxswt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/management-agent.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/plugin.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/cldrdata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/dnsns.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/jaccess.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/jfxrt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/localedata.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/nashorn.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/sunec.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/sunjce_provider.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/sunpkcs11.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_60.jdk/Contents/Home/jre/lib/ext/zipfs.jar:/Users/parrt/antlr/code/codebuff/java/target/classes:/Users/parrt/.m2/repository/org/antlr/antlr4-runtime/4.5.3/antlr4-runtime-4.5.3.jar:/Users/parrt/.m2/repository/org/apache/commons/commons-lang3/3.4/commons-lang3-3.4.jar:/Applications/IntelliJ IDEA.app/Contents/lib/idea_rt.jar" com.intellij.rt.execution.application.AppMain org.antlr.codebuff.Tool -antlr grammars grammars/org/antlr/codebuff/ANTLRv4Parser.g4
2+
3 files
3+
3233 feature vectors
4+
5+
6+
Incorrect_WS / All_WS: 119 / 487 = 24.4%
7+
misclassified: 0
8+
Diff is 0.17840805649757385
9+
/*
10+
* [The "BSD license"]
11+
* Copyright (c) 2013 Terence Parr
12+
* Copyright (c) 2013 Sam Harwell
13+
* All rights reserved.
14+
*
15+
* Redistribution and use in source and binary forms, with or without
16+
* modification, are permitted provided that the following conditions
17+
* are met:
18+
*
19+
* 1. Redistributions of source code must retain the above copyright
20+
* notice, this list of conditions and the following disclaimer.
21+
* 2. Redistributions in binary form must reproduce the above copyright
22+
* notice, this list of conditions and the following disclaimer in the
23+
* documentation and/or other materials provided with the distribution.
24+
* 3. The name of the author may not be used to endorse or promote products
25+
* derived from this software without specific prior written permission.
26+
*
27+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
28+
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30+
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
31+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
32+
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
36+
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37+
*/
38+
39+
/** A grammar for ANTLR v4 written in ANTLR v4.
40+
*/
41+
parser grammar ANTLRv4Parser;
42+
43+
options {
44+
tokenVocab=ANTLRv4Lexer;
45+
}
46+
47+
48+
grammarSpec
49+
: DOC_COMMENT? grammarType id SEMI prequelConstruct* rules modeSpec* EOF
50+
;
51+
52+
grammarType
53+
: ( LEXER GRAMMAR| PARSER GRAMMAR|GRAMMAR)
54+
;
55+
56+
prequelConstruct
57+
: optionsSpec
58+
| delegateGrammars
59+
| tokensSpec
60+
| channelsSpec
61+
| action
62+
;
63+
64+
optionsSpec
65+
: OPTIONS (option SEMI)* RBRACE
66+
;
67+
68+
option
69+
: id ASSIGN optionValue
70+
;
71+
72+
optionValue
73+
: id (DOT id)*
74+
| STRING_LITERAL
75+
| ACTION
76+
| INT
77+
;
78+
79+
delegateGrammars
80+
: IMPORT delegateGrammar (COMMA delegateGrammar)* SEMI
81+
;
82+
83+
delegateGrammar
84+
: id ASSIGN id
85+
| id
86+
;
87+
88+
tokensSpec
89+
: TOKENS idList RBRACE
90+
;
91+
92+
channelsSpec
93+
: CHANNELS idList? RBRACE
94+
;
95+
96+
idList
97+
: id (COMMA id)* COMMA?
98+
;
99+
100+
/** Match stuff like @parser::members {int i;} */
101+
action
102+
: AT (actionScopeName COLONCOLON)?
103+
id ACTION
104+
;
105+
106+
/** Sometimes the scope names will collide with keywords; allow them as
107+
* ids for action scopes.
108+
*/
109+
actionScopeName
110+
: id
111+
| LEXER
112+
| PARSER
113+
;
114+
115+
modeSpec
116+
: MODE id SEMI lexerRule*
117+
;
118+
119+
rules
120+
: ruleSpec*
121+
;
122+
123+
ruleSpec
124+
: parserRuleSpec
125+
| lexerRule
126+
;
127+
128+
parserRuleSpec
129+
: DOC_COMMENT? ruleModifiers? RULE_REF ARG_ACTION? ruleReturns? throwsSpec? localsSpec? rulePrequel* COLON ruleBlock SEMI exceptionGroup
130+
;
131+
132+
exceptionGroup
133+
: exceptionHandler* finallyClause?
134+
;
135+
136+
exceptionHandler
137+
: CATCH ARG_ACTION ACTION
138+
;
139+
140+
finallyClause
141+
: FINALLY ACTION
142+
;
143+
144+
rulePrequel
145+
: optionsSpec
146+
| ruleAction
147+
;
148+
149+
ruleReturns
150+
: RETURNS ARG_ACTION
151+
;
152+
153+
throwsSpec
154+
: THROWS id (COMMA id)*
155+
;
156+
157+
localsSpec
158+
: LOCALS ARG_ACTION
159+
;
160+
161+
/** Match stuff like @init {int i;} */
162+
ruleAction
163+
: AT id ACTION
164+
;
165+
166+
ruleModifiers
167+
: ruleModifier+
168+
;
169+
170+
ruleModifier
171+
: PUBLIC
172+
| PRIVATE
173+
| PROTECTED
174+
| FRAGMENT
175+
;
176+
177+
ruleBlock
178+
: ruleAltList
179+
;
180+
181+
ruleAltList
182+
: labeledAlt (OR labeledAlt)*
183+
;
184+
185+
labeledAlt
186+
: alternative (POUND id)?
187+
;
188+
189+
lexerRule
190+
: DOC_COMMENT? FRAGMENT? TOKEN_REF COLON lexerRuleBlock SEMI
191+
;
192+
193+
lexerRuleBlock
194+
: lexerAltList
195+
;
196+
197+
lexerAltList
198+
: lexerAlt (OR lexerAlt)*
199+
;
200+
201+
lexerAlt
202+
: lexerElements lexerCommands?
203+
|
204+
;
205+
206+
lexerElements
207+
: lexerElement+
208+
;
209+
210+
lexerElement
211+
: labeledLexerElement ebnfSuffix?
212+
| lexerAtom ebnfSuffix?
213+
| lexerBlock ebnfSuffix?
214+
| ACTION QUESTION?
215+
;
216+
217+
labeledLexerElement
218+
: id (ASSIGN|PLUS_ASSIGN)
219+
(lexerAtom| block)
220+
;
221+
222+
lexerBlock
223+
: LPAREN lexerAltList RPAREN
224+
;
225+
226+
lexerCommands
227+
: RARROW lexerCommand (COMMA lexerCommand)*
228+
;
229+
230+
lexerCommand
231+
: lexerCommandName LPAREN lexerCommandExpr RPAREN
232+
| lexerCommandName
233+
;
234+
235+
lexerCommandName
236+
: id
237+
| MODE
238+
;
239+
240+
lexerCommandExpr
241+
: id
242+
| INT
243+
;
244+
245+
altList
246+
: alternative (OR alternative)*
247+
;
248+
249+
alternative
250+
: elementOptions? element*
251+
;
252+
253+
element
254+
: labeledElement (ebnfSuffix
255+
|
256+
)
257+
| atom (ebnfSuffix
258+
|
259+
)
260+
| ebnf
261+
| ACTION QUESTION?
262+
;
263+
264+
labeledElement
265+
: id (ASSIGN|PLUS_ASSIGN)
266+
(atom| block)
267+
;
268+
269+
ebnf
270+
: block blockSuffix?
271+
;
272+
273+
blockSuffix
274+
: ebnfSuffix
275+
;
276+
277+
ebnfSuffix
278+
: QUESTION QUESTION?
279+
| STAR QUESTION?
280+
| PLUS QUESTION?
281+
;
282+
283+
lexerAtom
284+
: range
285+
| terminal
286+
| RULE_REF
287+
| notSet
288+
| LEXER_CHAR_SET
289+
| DOT elementOptions?
290+
;
291+
292+
atom
293+
: range
294+
| terminal
295+
| ruleref
296+
| notSet
297+
| DOT elementOptions?
298+
;
299+
300+
notSet
301+
: NOT setElement
302+
| NOT blockSet
303+
;
304+
305+
blockSet
306+
: LPAREN setElement (OR setElement)* RPAREN
307+
;
308+
309+
setElement
310+
: TOKEN_REF elementOptions?
311+
| STRING_LITERAL elementOptions?
312+
| range
313+
| LEXER_CHAR_SET
314+
;
315+
316+
block
317+
: LPAREN (optionsSpec? ruleAction* COLON)?
318+
altList RPAREN
319+
;
320+
321+
ruleref
322+
: RULE_REF ARG_ACTION? elementOptions?
323+
;
324+
325+
range
326+
: STRING_LITERAL RANGE STRING_LITERAL
327+
;
328+
329+
terminal
330+
: TOKEN_REF elementOptions?
331+
| STRING_LITERAL elementOptions?
332+
;
333+
334+
elementOptions
335+
: LT elementOption (COMMA elementOption)* GT
336+
;
337+
338+
elementOption
339+
: id
340+
| id ASSIGN (id|STRING_LITERAL)
341+
;
342+
343+
id
344+
: RULE_REF
345+
| TOKEN_REF
346+
;
347+

java/grammars/org/antlr/codebuff/ANTLRv4Lexer.g4

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,17 +75,8 @@ tokens {
7575

7676
@Override
7777
public Token emit() {
78-
if (_type == /*ID*/ 999999) { // ################### just testing
79-
String firstChar = _input.getText(Interval.of(_tokenStartCharIndex, _tokenStartCharIndex));
80-
if (Character.isUpperCase(firstChar.charAt(0))) {
81-
_type = TOKEN_REF;
82-
} else {
83-
_type = RULE_REF;
84-
}
85-
86-
if (_currentRuleType == Token.INVALID_TYPE) { // if outside of rule def
87-
_currentRuleType = _type; // set to inside lexer or parser rule
88-
}
78+
if (_type == TOKEN_REF || _type == RULE_REF ) {
79+
_currentRuleType = _type;
8980
}
9081
else if (_type == SEMI) { // exit rule def
9182
_currentRuleType = Token.INVALID_TYPE;

java/pom.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151

5252
<build>
5353
<sourceDirectory>src</sourceDirectory>
54+
<testSourceDirectory>test</testSourceDirectory>
5455
<plugins>
5556
<plugin>
5657
<groupId>org.antlr</groupId>

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,7 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
479479
ParserRuleContext ancestorParent = null;
480480
ParserRuleContext ancestorParent2 = null;
481481
if ( earliestLeftAncestor==null ) { // just use regular parent then
482-
ancestorParent = getParent(node);
482+
ancestorParent = (ParserRuleContext)node.getParent();
483483
ancestorParent2 = ancestorParent.getParent(); // get immediate parent for context
484484
}
485485
else {
@@ -544,7 +544,7 @@ public static TerminalNode getMatchingLeftSymbol(InputDocument doc,
544544
int curTokensParentRuleIndex = parent.getRuleIndex();
545545
Token curToken = node.getSymbol();
546546
if (ruleToPairsBag != null) {
547-
String ruleName = JavaParser.ruleNames[curTokensParentRuleIndex];
547+
String ruleName = doc.parser.getRuleNames()[curTokensParentRuleIndex];
548548
List<Pair<Integer, Integer>> pairs = ruleToPairsBag.get(ruleName);
549549
if ( pairs!=null ) {
550550
// Find appropriate pair given current token

0 commit comments

Comments
 (0)