Skip to content

Commit 2cfacfd

Browse files
committed
inject ws/nl features should not use "is first on line"...duh. that is the predicted value. Much better newlines.
Add parent of left ancestor and parent's parent. Much better indent level. center INT feature output, make a bit wider for header name.
1 parent be70dd1 commit 2cfacfd

2 files changed

Lines changed: 51 additions & 14 deletions

File tree

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,16 @@ public class CollectFeatures {
7373
public static final int INDEX_MATCHING_TOKEN_DIFF_LINE = 3;
7474
public static final int INDEX_FIRST_ON_LINE = 4; // a \n right before this token?
7575
public static final int INDEX_EARLIEST_LEFT_ANCESTOR = 5;
76+
public static final int INDEX_ANCESTORS_PARENT_RULE = 6;
77+
public static final int INDEX_ANCESTORS_PARENT_CHILD_INDEX = 7;
78+
public static final int INDEX_ANCESTORS_PARENT2_RULE = 8;
79+
public static final int INDEX_ANCESTORS_PARENT2_CHILD_INDEX = 9;
7680

77-
public static final int INDEX_INFO_FILE = 6;
78-
public static final int INDEX_INFO_LINE = 7;
79-
public static final int INDEX_INFO_CHARPOS = 8;
81+
public static final int INDEX_INFO_FILE = 10;
82+
public static final int INDEX_INFO_LINE = 11;
83+
public static final int INDEX_INFO_CHARPOS = 12;
8084

81-
public static final int NUM_FEATURES = 9;
85+
public static final int NUM_FEATURES = 13;
8286

8387
// public static final int INDEX_RULE = 8; // what rule are we in?
8488
// public static final int INDEX_EARLIEST_RIGHT_ANCESTOR = 9;
@@ -98,8 +102,12 @@ public class CollectFeatures {
98102
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(-1)", "right ancestor"}, 1),
99103
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(1)"}, 1),
100104
new FeatureMetaData(FeatureType.BOOL, new String[]{"Pair", "dif\\n"}, 1),
101-
new FeatureMetaData(FeatureType.BOOL, new String[]{"Strt", "line"}, 1),
105+
FeatureMetaData.UNUSED,
102106
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 1),
107+
FeatureMetaData.UNUSED,
108+
FeatureMetaData.UNUSED,
109+
FeatureMetaData.UNUSED,
110+
FeatureMetaData.UNUSED,
103111
// these 6 features seem to predict newline really well. whitespace ok too
104112
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
105113
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
@@ -110,9 +118,13 @@ public class CollectFeatures {
110118
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(-1)"}, 1),
111119
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(-1)", "right ancestor"}, 1),
112120
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(1)"}, 1),
113-
new FeatureMetaData(FeatureType.BOOL, new String[]{"Pair", "dif\\n"}, 1),
114-
new FeatureMetaData(FeatureType.BOOL, new String[]{"Strt", "line"}, 1),
121+
new FeatureMetaData(FeatureType.BOOL, new String[] {"Pair", "dif\\n"}, 1),
122+
new FeatureMetaData(FeatureType.BOOL, new String[] {"Strt", "line"}, 1),
115123
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 1),
124+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent"}, 1),
125+
new FeatureMetaData(FeatureType.INT, new String[] {"parent", "child index"}, 1),
126+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^2"}, 1),
127+
new FeatureMetaData(FeatureType.INT, new String[] {"parent^2", "child index"}, 1),
116128
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
117129
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
118130
new FeatureMetaData(FeatureType.INFO_CHARPOS, new String[] {"char", "pos"}, 0)
@@ -122,9 +134,13 @@ public class CollectFeatures {
122134
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(-1)"}, 1),
123135
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(-1)", "right ancestor"}, 1),
124136
new FeatureMetaData(FeatureType.TOKEN, new String[] {"", "LT(1)"}, 1),
125-
new FeatureMetaData(FeatureType.BOOL, new String[]{"Pair", "dif\\n"}, 1),
126-
new FeatureMetaData(FeatureType.BOOL, new String[]{"Strt", "line"}, 1),
137+
new FeatureMetaData(FeatureType.BOOL, new String[] {"Pair", "dif\\n"}, 1),
138+
new FeatureMetaData(FeatureType.BOOL, new String[] {"Strt", "line"}, 1),
127139
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 1),
140+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent"}, 1),
141+
new FeatureMetaData(FeatureType.INT, new String[] {"parent", "child index"}, 1),
142+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^2"}, 1),
143+
new FeatureMetaData(FeatureType.INT, new String[] {"parent^2", "child index"}, 1),
128144
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
129145
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
130146
new FeatureMetaData(FeatureType.INFO_CHARPOS, new String[] {"char", "pos"}, 0)
@@ -404,6 +420,9 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
404420
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(node, curToken);
405421
int earliestLeftAncestorRuleIndex = earliestLeftAncestor.getRuleIndex();
406422
int earliestLeftAncestorRuleAlt = earliestLeftAncestor.getAltNumber();
423+
ParserRuleContext earliestLeftAncestorParent = earliestLeftAncestor.getParent();
424+
425+
ParserRuleContext earliestLeftAncestorParent2 = earliestLeftAncestorParent!=null ? earliestLeftAncestorParent.getParent() : null;
407426

408427
ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken(node, curToken);
409428
int earliestRightAncestorRuleIndex = earliestRightAncestor.getRuleIndex();
@@ -412,8 +431,6 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
412431
int matchingSymbolOnDiffLine = getMatchingSymbolOnDiffLine(doc, node, line);
413432

414433
// Get some context from parse tree
415-
ParserRuleContext ancestorParent = null;
416-
ParserRuleContext ancestorParent2 = null;
417434
// if ( earliestLeftAncestor==null ) { // just use regular parent then
418435
// ancestorParent = getParent(node);
419436
// if ( ancestorParent!=null ) {
@@ -436,6 +453,8 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
436453
// public static final int INDEX_MATCHING_TOKEN_DIFF_LINE = 3;
437454
// public static final int INDEX_FIRST_ON_LINE = 4; // a \n right before this token?
438455
// public static final int INDEX_EARLIEST_LEFT_ANCESTOR = 5;
456+
// new FeatureMetaData(FeatureType.RULE, new String[] {"left ancestor", "parent"}, 1),
457+
// new FeatureMetaData(FeatureType.INT, new String[] {"left ancestor", "child index"}, 1),
439458

440459
boolean curTokenStartsNewLine = tokens.LT(1).getLine()>tokens.LT(-1).getLine();
441460
int[] features = {
@@ -444,7 +463,11 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
444463
tokens.LT(1).getType(),
445464
matchingSymbolOnDiffLine,
446465
curTokenStartsNewLine ? 1 : 0,
447-
rulealt(earliestLeftAncestorRuleIndex,earliestLeftAncestorRuleAlt),
466+
rulealt(earliestLeftAncestor.getRuleIndex(),earliestLeftAncestor.getAltNumber()),
467+
rulealt(earliestLeftAncestorParent.getRuleIndex(), earliestLeftAncestorParent.getAltNumber()),
468+
getChildIndex(earliestLeftAncestor),
469+
earliestLeftAncestorParent2!=null ? rulealt(earliestLeftAncestorParent2.getRuleIndex(), earliestLeftAncestorParent2.getAltNumber()) : 0,
470+
getChildIndex(earliestLeftAncestorParent),
448471

449472
// info
450473
0, // dummy; we don't store file index into feature vector
@@ -600,7 +623,7 @@ public static String _toString(FeatureMetaData[] FEATURES, InputDocument doc, in
600623
case INFO_LINE:
601624
case INFO_CHARPOS:
602625
if ( features[i]>=0 ) {
603-
buf.append(String.format("%"+displayWidth+"s", String.valueOf(features[i])));
626+
buf.append(String.format("%"+displayWidth+"s", StringUtils.center(String.valueOf(features[i]),displayWidth)));
604627
}
605628
else {
606629
buf.append(Tool.sequence(displayWidth, " "));
@@ -716,6 +739,20 @@ public static ParserRuleContext getParent(TerminalNode p) {
716739
return parentClosure((ParserRuleContext)p.getParent());
717740
}
718741

742+
public static int getChildIndex(ParseTree t) {
743+
if ( t==null ) return -1;
744+
ParseTree parent = t.getParent();
745+
if ( parent==null ) {
746+
return -1;
747+
}
748+
for (int i = 0; i<parent.getChildCount(); i++) {
749+
if ( parent.getChild(i)==t ) {
750+
return i;
751+
}
752+
}
753+
return -1;
754+
}
755+
719756
/** Same as p.getParent() except we scan through chain rule nodes */
720757
public static ParserRuleContext getParent(ParserRuleContext p) {
721758
if ( p==null ) return null;

java/src/org/antlr/codebuff/FeatureType.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
package org.antlr.codebuff;
22

33
public enum FeatureType {
4-
TOKEN(12), RULE(14), INT(7), BOOL(5), COL(7),
4+
TOKEN(12), RULE(14), INT(12), BOOL(5), COL(7),
55
INFO_FILE(15), INFO_LINE(4), INFO_CHARPOS(4),
66
UNUSED(0);
77
public int displayWidth;

0 commit comments

Comments
 (0)