Skip to content

Commit 0a56ec1

Browse files
committed
Merge pull request #17 from antlr/indent-relative-parent
add context up the parse tree. Indent is part of alignment decision
2 parents 87590aa + fa5120e commit 0a56ec1

7 files changed

Lines changed: 203 additions & 73 deletions

File tree

java/codebuff.iml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
</content>
1515
<orderEntry type="inheritedJdk" />
1616
<orderEntry type="sourceFolder" forTests="false" />
17-
<orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.5.1" level="project" />
17+
<orderEntry type="library" name="Maven: org.antlr:antlr4-runtime:4.5.3-SNAPSHOT" level="project" />
1818
<orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.11" level="project" />
1919
<orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.3" level="project" />
2020
<orderEntry type="library" name="Maven: org.apache.commons:commons-lang3:3.4" level="project" />

java/codebuff.ipr

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,17 @@
225225
<root url="file://$PROJECT_DIR$/../../antlr4/runtime/Java/src" />
226226
</SOURCES>
227227
</library>
228+
<library name="Maven: org.antlr:antlr4-runtime:4.5.3-SNAPSHOT">
229+
<CLASSES>
230+
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.5.3-SNAPSHOT/antlr4-runtime-4.5.3-SNAPSHOT.jar!/" />
231+
</CLASSES>
232+
<JAVADOC>
233+
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.5.3-SNAPSHOT/antlr4-runtime-4.5.3-SNAPSHOT-javadoc.jar!/" />
234+
</JAVADOC>
235+
<SOURCES>
236+
<root url="jar://$MAVEN_REPOSITORY$/org/antlr/antlr4-runtime/4.5.3-SNAPSHOT/antlr4-runtime-4.5.3-SNAPSHOT-sources.jar!/" />
237+
</SOURCES>
238+
</library>
228239
<library name="Maven: org.apache.commons:commons-lang3:3.4">
229240
<CLASSES>
230241
<root url="jar://$MAVEN_REPOSITORY$/org/apache/commons/commons-lang3/3.4/commons-lang3-3.4.jar!/" />

java/pom.xml

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
<dependency>
3535
<groupId>org.antlr</groupId>
3636
<artifactId>antlr4-runtime</artifactId>
37-
<version>4.5.1</version>
37+
<version>4.5.3-SNAPSHOT</version>
3838
</dependency>
3939
<dependency>
4040
<groupId>junit</groupId>
@@ -47,13 +47,6 @@
4747
<artifactId>commons-lang3</artifactId>
4848
<version>3.4</version>
4949
</dependency>
50-
<!--
51-
<dependency>
52-
<groupId>nz.ac.waikato.cms.weka</groupId>
53-
<artifactId>weka-stable</artifactId>
54-
<version>3.6.6</version>
55-
</dependency>
56-
-->
5750
</dependencies>
5851

5952
<build>
@@ -62,13 +55,13 @@
6255
<plugin>
6356
<groupId>org.antlr</groupId>
6457
<artifactId>antlr4-maven-plugin</artifactId>
65-
<version>4.5.1</version>
58+
<version>4.5.3-SNAPSHOT</version>
6659
<executions>
6760
<execution>
6861
<configuration>
6962
<sourceDirectory>grammars</sourceDirectory>
7063
<options>
71-
64+
<contextSuperClass>org.antlr.codebuff.misc.RuleNodeWithAlt</contextSuperClass>
7265
</options>
7366
</configuration>
7467
<id>antlr</id>

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 169 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import org.antlr.v4.runtime.ParserRuleContext;
77
import org.antlr.v4.runtime.Token;
88
import org.antlr.v4.runtime.Vocabulary;
9+
import org.antlr.v4.runtime.atn.ATN;
910
import org.antlr.v4.runtime.misc.Pair;
1011
import org.antlr.v4.runtime.tree.ErrorNode;
1112
import org.antlr.v4.runtime.tree.ParseTreeListener;
@@ -38,7 +39,21 @@ public class CollectFeatures {
3839
public static final int CAT_ALIGN_WITH_LIST_FIRST_ELEMENT = 3;
3940
public static final int CAT_ALIGN_WITH_PAIR = 4;
4041

41-
public static final int CAT_INDENT = 100;
42+
/* We want to identify indentation from a parent's start token but that
43+
parent could be a number of levels up the tree. The next category
44+
values indicate indentation from the current token's left ancestor's
45+
parent then it's parent and so on. For category value:
46+
47+
CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN + i
48+
49+
current token is indented from start token of node i levels up
50+
from ancestor.
51+
*/
52+
public static final int CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN = 100; // left ancestor's first token is really current token
53+
54+
public static final int CAT_INDENT = 200;
55+
56+
// indexes into feature vector
4257

4358
public static final int INDEX_PREV2_TYPE = 0;
4459
public static final int INDEX_PREV_TYPE = 1;
@@ -52,12 +67,16 @@ public class CollectFeatures {
5267
public static final int INDEX_RULE = 9; // what rule are we in?
5368
public static final int INDEX_EARLIEST_RIGHT_ANCESTOR = 10;
5469
public static final int INDEX_EARLIEST_LEFT_ANCESTOR = 11;
55-
public static final int INDEX_NEXT_TYPE = 12;
56-
public static final int INDEX_INFO_FILE = 13;
57-
public static final int INDEX_INFO_LINE = 14;
58-
public static final int INDEX_INFO_CHARPOS = 15;
70+
public static final int INDEX_ANCESTORS_PARENT4_RULE = 12;
71+
public static final int INDEX_ANCESTORS_PARENT3_RULE = 13;
72+
public static final int INDEX_ANCESTORS_PARENT2_RULE = 14;
73+
public static final int INDEX_ANCESTORS_PARENT_RULE = 15;
74+
public static final int INDEX_NEXT_TYPE = 16;
75+
public static final int INDEX_INFO_FILE = 17;
76+
public static final int INDEX_INFO_LINE = 18;
77+
public static final int INDEX_INFO_CHARPOS = 19;
5978

60-
public static final int NUM_FEATURES = 15;
79+
public static final int NUM_FEATURES = 20;
6180

6281
public static FeatureMetaData[] FEATURES_INJECT_NL = {
6382
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(-2)"}, 1),
@@ -72,6 +91,10 @@ public class CollectFeatures {
7291
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "rule"}, 2),
7392
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "right ancestor"}, 3),
7493
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 3),
94+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^4"}, 1),
95+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^3"}, 1),
96+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^2"}, 1),
97+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent"}, 1),
7598
FeatureMetaData.UNUSED,
7699
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
77100
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
@@ -92,6 +115,10 @@ public class CollectFeatures {
92115
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "rule"}, 2),
93116
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "right ancestor"}, 3),
94117
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 3),
118+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^4"}, 1),
119+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^3"}, 1),
120+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^2"}, 1),
121+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent"}, 1),
95122
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(2)"}, 1),
96123
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
97124
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
@@ -111,6 +138,10 @@ public class CollectFeatures {
111138
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "rule"}, 2),
112139
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "right ancestor"}, 3),
113140
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 3),
141+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^4"}, 1),
142+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^3"}, 1),
143+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^2"}, 1),
144+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent"}, 1),
114145
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(2)"}, 1),
115146
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
116147
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
@@ -130,6 +161,10 @@ public class CollectFeatures {
130161
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "rule"}, 2),
131162
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "right ancestor"}, 3),
132163
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 3),
164+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^4"}, 1),
165+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^3"}, 1),
166+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent^2"}, 1),
167+
new FeatureMetaData(FeatureType.RULE, new String[] {"ancestor's", "parent"}, 1),
133168
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(2)"}, 1),
134169
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
135170
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
@@ -257,8 +292,19 @@ else if ( earliestAncestorsParentStart!=null &&
257292
{
258293
aligned = CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN;
259294
}
260-
else if ( columnDelta>0 ) {
261-
aligned = CAT_INDENT; // indent standard amount
295+
else if ( columnDelta!=0 ) {
296+
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
297+
ParserRuleContext ancestorParent = getParent(earliestLeftAncestor);
298+
int indentedFromPos = curToken.getCharPositionInLine()-Formatter.INDENT_LEVEL;
299+
ParserRuleContext indentParent =
300+
earliestAncestorStartingAtCharPos(ancestorParent, indentedFromPos);
301+
if ( indentParent!=null ) {
302+
int deltaFromLeftAncestor = getDeltaToAncestor(earliestLeftAncestor, indentParent);
303+
aligned = CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN+deltaFromLeftAncestor;
304+
}
305+
else {
306+
aligned = CAT_INDENT; // indent standard amount
307+
}
262308
}
263309

264310
return aligned;
@@ -405,6 +451,55 @@ public static ParserRuleContext earliestAncestorEndingWithToken(ParserRuleContex
405451
return prev;
406452
}
407453

454+
/** Walk upwards from node until we find p.start at char position and p.start
455+
* is first token on a line; return null if there is no such ancestor p.
456+
*/
457+
public ParserRuleContext earliestAncestorStartingAtCharPos(ParserRuleContext node, int charpos) {
458+
ParserRuleContext p = node;
459+
while ( p!=null ) {
460+
if ( isFirstOnLine(p.getStart()) && p.getStart().getCharPositionInLine()==charpos ) {
461+
return p;
462+
}
463+
p = p.getParent();
464+
}
465+
return null;
466+
}
467+
468+
/** Return the number of hops to get to ancestor from node or -1 if we
469+
* don't find ancestor on path to root.
470+
*/
471+
public static int getDeltaToAncestor(ParserRuleContext node, ParserRuleContext ancestor) {
472+
int n = 0;
473+
ParserRuleContext p = node;
474+
while ( p!=null && p!=ancestor ) {
475+
n++;
476+
p = p.getParent();
477+
}
478+
if ( p==null ) return -1;
479+
return n;
480+
}
481+
482+
public static ParserRuleContext getAncestor(ParserRuleContext node, int delta) {
483+
System.out.print(node.getText()+" "+JavaParser.ruleNames[node.getRuleIndex()]+"+"+delta);
484+
int n = 0;
485+
ParserRuleContext p = node;
486+
while ( p!=null && n!=delta ) {
487+
n++;
488+
p = p.getParent();
489+
}
490+
System.out.println(" is "+JavaParser.ruleNames[p.getRuleIndex()]+":"+p.getAltNumber());
491+
return p;
492+
}
493+
494+
public boolean isFirstOnLine(Token t) {
495+
tokens.seek(t.getTokenIndex()); // LT(1)
496+
Token prevToken = tokens.LT(-1);
497+
if ( prevToken==null ) {
498+
return true; // if we are first token, must be first on line
499+
}
500+
return t.getLine()>prevToken.getLine();
501+
}
502+
408503
public static ParserRuleContext deepestCommonAncestor(ParserRuleContext t1, ParserRuleContext t2) {
409504
if ( t1==t2 ) return t1;
410505
List<? extends Tree> t1_ancestors = Trees.getAncestors(t1);
@@ -443,29 +538,37 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
443538
TerminalNode prevTerminalNode = tokenToNodeMap.get(prevToken);
444539
ParserRuleContext parent = (ParserRuleContext)prevTerminalNode.getParent();
445540
int prevTokenRuleIndex = parent.getRuleIndex();
541+
int prevTokenRuleAltNum = parent.getAltNumber();
446542
ParserRuleContext prevEarliestRightAncestor = earliestAncestorEndingWithToken(parent, prevToken);
447543
int prevEarliestAncestorRuleIndex = -1;
544+
int prevEarliestAncestorRuleAltNum = 0;
448545
int prevEarliestAncestorWidth = -1;
449546
if ( prevEarliestRightAncestor!=null ) {
450547
prevEarliestAncestorRuleIndex = prevEarliestRightAncestor.getRuleIndex();
548+
prevEarliestAncestorRuleAltNum = prevEarliestRightAncestor.getAltNumber();
451549
prevEarliestAncestorWidth = prevEarliestRightAncestor.stop.getStopIndex()-prevEarliestRightAncestor.start.getStartIndex()+1;
452550
}
453551

454552
// Get context information for current token
455553
parent = (ParserRuleContext)node.getParent();
456554
int curTokensParentRuleIndex = parent.getRuleIndex();
555+
int curTokensParentRuleAltNumber = parent.getAltNumber();
457556
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
458557
int earliestAncestorWidth = -1;
459558
int earliestLeftAncestorRuleIndex = -1;
559+
int earliestLeftAncestorRuleAlt = 0;
460560
if ( earliestLeftAncestor!=null ) {
461561
earliestLeftAncestorRuleIndex = earliestLeftAncestor.getRuleIndex();
562+
earliestLeftAncestorRuleAlt = earliestLeftAncestor.getAltNumber();
462563
earliestAncestorWidth = earliestLeftAncestor.stop.getStopIndex()-earliestLeftAncestor.start.getStartIndex()+1;
463564
}
464565

465566
ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken(parent, curToken);
466567
int earliestRightAncestorRuleIndex = -1;
568+
int earliestRightAncestorRuleAlt = 0;
467569
if ( earliestRightAncestor!=null ) {
468570
earliestRightAncestorRuleIndex = earliestRightAncestor.getRuleIndex();
571+
earliestRightAncestorRuleAlt = earliestRightAncestor.getAltNumber();
469572
}
470573
int prevTokenEndCharPos = window.get(1).getCharPositionInLine() + window.get(1).getText().length();
471574

@@ -479,22 +582,40 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
479582
// TODO: I don't think we can detect first element of list
480583
boolean startOfList = isFirstSiblingOfList(tokenToNodeMap, curToken);
481584

585+
// Get some context from parse tree
586+
ParserRuleContext ancestorParent = null;
587+
ParserRuleContext ancestorParent2 = null;
588+
if ( earliestLeftAncestor==null ) { // just use regular parent then
589+
ancestorParent = getParent(node);
590+
ancestorParent2 = ancestorParent.getParent(); // get immediate parent for context
591+
}
592+
else {
593+
ancestorParent = getParent(earliestLeftAncestor); // get parent but skip chain rules
594+
ancestorParent2 = ancestorParent.getParent(); // get immediate parent for context
595+
}
596+
ParserRuleContext ancestorParent3 = ancestorParent2!=null ? ancestorParent2.getParent() : null;
597+
ParserRuleContext ancestorParent4 = ancestorParent3!=null ? ancestorParent3.getParent() : null;
598+
482599
boolean curTokenStartsNewLine = window.get(2).getLine()>window.get(1).getLine();
483600
int[] features = {
484601
window.get(0).getType(),
485-
486602
window.get(1).getType(),
487-
prevTokenRuleIndex,
603+
rulealt(prevTokenRuleIndex,prevTokenRuleAltNum),
488604
prevTokenEndCharPos,
489-
prevEarliestAncestorRuleIndex,
605+
rulealt(prevEarliestAncestorRuleIndex,prevEarliestAncestorRuleAltNum),
490606

491607
window.get(2).getType(), // LT(1)
492608
startOfList ? 1 : 0,
493609
matchingSymbolOnDiffLine,
494610
curTokenStartsNewLine ? 1 : 0,
495-
curTokensParentRuleIndex,
496-
earliestRightAncestorRuleIndex,
497-
earliestLeftAncestorRuleIndex,
611+
rulealt(curTokensParentRuleIndex,curTokensParentRuleAltNumber),
612+
rulealt(earliestRightAncestorRuleIndex,earliestRightAncestorRuleAlt),
613+
rulealt(earliestLeftAncestorRuleIndex,earliestLeftAncestorRuleAlt),
614+
ancestorParent4!=null ? rulealt(ancestorParent4.getRuleIndex(),ancestorParent4.getAltNumber()) : -1,
615+
ancestorParent3!=null ? rulealt(ancestorParent3.getRuleIndex(),ancestorParent3.getAltNumber()) : -1,
616+
ancestorParent2!=null ? rulealt(ancestorParent2.getRuleIndex(),ancestorParent2.getAltNumber()) : -1,
617+
rulealt(ancestorParent.getRuleIndex(),ancestorParent.getAltNumber()), // always at least token's parent exists
618+
498619
window.get(3).getType(),
499620

500621
// info
@@ -657,7 +778,9 @@ public static String _toString(FeatureMetaData[] FEATURES, InputDocument doc, in
657778
break;
658779
case RULE :
659780
if ( features[i]>=0 ) {
660-
String ruleName = ruleNames[features[i]];
781+
String ruleName = ruleNames[unrulealt(features[i])[0]];
782+
int ruleAltNum = unrulealt(features[i])[1];
783+
ruleName += ":"+ruleAltNum;
661784
abbrev = StringUtils.abbreviateMiddle(ruleName, "*", displayWidth);
662785
buf.append(String.format("%"+displayWidth+"s", abbrev));
663786
}
@@ -782,11 +905,30 @@ public static List<Token> getRealTokens(CommonTokenStream tokens) {
782905
return real;
783906
}
784907

908+
public static ParserRuleContext getParent(TerminalNode p) {
909+
return getParent((ParserRuleContext)p.getParent());
910+
}
911+
785912
/** Same as p.getParent() except we scan through chain rule nodes */
786913
public static ParserRuleContext getParent(ParserRuleContext p) {
787914
if ( p==null ) return null;
788915
ParserRuleContext lastValidParent = p.getParent();
789-
// now try to walk chain rules starting with the parent of the usual parent
916+
if ( lastValidParent==null ) return null; // must have hit the root
917+
918+
return parentClosure(p.getParent());
919+
//
920+
// // now try to walk chain rules starting with the parent of the usual parent
921+
// ParserRuleContext q = lastValidParent.getParent();
922+
// while ( q!=null && q.getChildCount()==1 ) { // while is a chain rule
923+
// lastValidParent = q;
924+
// q = q.getParent();
925+
// }
926+
// return lastValidParent;
927+
}
928+
929+
// try to walk chain rules starting with the parent of the usual parent
930+
public static ParserRuleContext parentClosure(ParserRuleContext p) {
931+
ParserRuleContext lastValidParent = p;
790932
ParserRuleContext q = lastValidParent.getParent();
791933
while ( q!=null && q.getChildCount()==1 ) { // while is a chain rule
792934
lastValidParent = q;
@@ -795,4 +937,15 @@ public static ParserRuleContext getParent(ParserRuleContext p) {
795937
return lastValidParent;
796938
}
797939

940+
/** Pack a rule index and an alternative number into the same 32-bit integer. */
941+
public static int rulealt(int rule, int alt) {
942+
if ( rule==-1 ) return -1;
943+
return rule<<16 | alt;
944+
}
945+
946+
/** Return {rule index, rule alt number} */
947+
public static int[] unrulealt(int ra) {
948+
if ( ra==-1 ) return new int[] {-1, ATN.INVALID_ALT_NUMBER};
949+
return new int[] {(ra>>16)&0xFFFF,ra&0xFFFF};
950+
}
798951
}

0 commit comments

Comments
 (0)