Skip to content

Commit 1c722df

Browse files
committed
wow. java is now bang on. key new idea is the child index rather than parent width. all costs are same at 1.
1 parent 2cfacfd commit 1c722df

1 file changed

Lines changed: 28 additions & 6 deletions

File tree

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import java.util.Map;
2424

2525
public class CollectFeatures {
26-
public static final double MAX_CONTEXT_DIFF_THRESHOLD = 0.20;
26+
public static final double MAX_CONTEXT_DIFF_THRESHOLD = 0.13;
2727
public static final double MAX_CONTEXT_DIFF_THRESHOLD2 = 0.50;
2828

2929
// Feature values for pair on diff lines feature
@@ -77,12 +77,16 @@ public class CollectFeatures {
7777
public static final int INDEX_ANCESTORS_PARENT_CHILD_INDEX = 7;
7878
public static final int INDEX_ANCESTORS_PARENT2_RULE = 8;
7979
public static final int INDEX_ANCESTORS_PARENT2_CHILD_INDEX = 9;
80+
public static final int INDEX_ANCESTORS_PARENT3_RULE = 10;
81+
public static final int INDEX_ANCESTORS_PARENT3_CHILD_INDEX = 11;
82+
public static final int INDEX_ANCESTORS_PARENT4_RULE = 12;
83+
public static final int INDEX_ANCESTORS_PARENT4_CHILD_INDEX = 13;
8084

81-
public static final int INDEX_INFO_FILE = 10;
82-
public static final int INDEX_INFO_LINE = 11;
83-
public static final int INDEX_INFO_CHARPOS = 12;
85+
public static final int INDEX_INFO_FILE = 14;
86+
public static final int INDEX_INFO_LINE = 15;
87+
public static final int INDEX_INFO_CHARPOS = 16;
8488

85-
public static final int NUM_FEATURES = 13;
89+
public static final int NUM_FEATURES = 17;
8690

8791
// public static final int INDEX_RULE = 8; // what rule are we in?
8892
// public static final int INDEX_EARLIEST_RIGHT_ANCESTOR = 9;
@@ -104,11 +108,15 @@ public class CollectFeatures {
104108
new FeatureMetaData(FeatureType.BOOL, new String[]{"Pair", "dif\\n"}, 1),
105109
FeatureMetaData.UNUSED,
106110
new FeatureMetaData(FeatureType.RULE, new String[] {"LT(1)", "left ancestor"}, 1),
111+
// these previous 5 features seem to predict newline really well. whitespace ok too
112+
FeatureMetaData.UNUSED,
113+
FeatureMetaData.UNUSED,
114+
FeatureMetaData.UNUSED,
115+
FeatureMetaData.UNUSED,
107116
FeatureMetaData.UNUSED,
108117
FeatureMetaData.UNUSED,
109118
FeatureMetaData.UNUSED,
110119
FeatureMetaData.UNUSED,
111-
// these 6 features seem to predict newline really well. whitespace ok too
112120
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
113121
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
114122
new FeatureMetaData(FeatureType.INFO_CHARPOS, new String[] {"char", "pos"}, 0)
@@ -125,6 +133,10 @@ public class CollectFeatures {
125133
new FeatureMetaData(FeatureType.INT, new String[] {"parent", "child index"}, 1),
126134
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^2"}, 1),
127135
new FeatureMetaData(FeatureType.INT, new String[] {"parent^2", "child index"}, 1),
136+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^3"}, 1),
137+
new FeatureMetaData(FeatureType.INT, new String[] {"parent^3", "child index"}, 1),
138+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^4"}, 1),
139+
new FeatureMetaData(FeatureType.INT, new String[] {"parent^4", "child index"}, 1),
128140
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
129141
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
130142
new FeatureMetaData(FeatureType.INFO_CHARPOS, new String[] {"char", "pos"}, 0)
@@ -141,6 +153,10 @@ public class CollectFeatures {
141153
new FeatureMetaData(FeatureType.INT, new String[] {"parent", "child index"}, 1),
142154
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^2"}, 1),
143155
new FeatureMetaData(FeatureType.INT, new String[] {"parent^2", "child index"}, 1),
156+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^3"}, 1),
157+
new FeatureMetaData(FeatureType.INT, new String[] {"parent^3", "child index"}, 1),
158+
new FeatureMetaData(FeatureType.RULE, new String[] {"", "parent^4"}, 1),
159+
new FeatureMetaData(FeatureType.INT, new String[] {"parent^4", "child index"}, 1),
144160
new FeatureMetaData(FeatureType.INFO_FILE, new String[] {"", "file"}, 0),
145161
new FeatureMetaData(FeatureType.INFO_LINE, new String[] {"", "line"}, 0),
146162
new FeatureMetaData(FeatureType.INFO_CHARPOS, new String[] {"char", "pos"}, 0)
@@ -423,6 +439,8 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
423439
ParserRuleContext earliestLeftAncestorParent = earliestLeftAncestor.getParent();
424440

425441
ParserRuleContext earliestLeftAncestorParent2 = earliestLeftAncestorParent!=null ? earliestLeftAncestorParent.getParent() : null;
442+
ParserRuleContext earliestLeftAncestorParent3 = earliestLeftAncestorParent2!=null ? earliestLeftAncestorParent2.getParent() : null;
443+
ParserRuleContext earliestLeftAncestorParent4 = earliestLeftAncestorParent3!=null ? earliestLeftAncestorParent3.getParent() : null;
426444

427445
ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken(node, curToken);
428446
int earliestRightAncestorRuleIndex = earliestRightAncestor.getRuleIndex();
@@ -468,6 +486,10 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
468486
getChildIndex(earliestLeftAncestor),
469487
earliestLeftAncestorParent2!=null ? rulealt(earliestLeftAncestorParent2.getRuleIndex(), earliestLeftAncestorParent2.getAltNumber()) : 0,
470488
getChildIndex(earliestLeftAncestorParent),
489+
earliestLeftAncestorParent3!=null ? rulealt(earliestLeftAncestorParent3.getRuleIndex(), earliestLeftAncestorParent3.getAltNumber()) : 0,
490+
getChildIndex(earliestLeftAncestorParent2),
491+
earliestLeftAncestorParent4!=null ? rulealt(earliestLeftAncestorParent4.getRuleIndex(), earliestLeftAncestorParent4.getAltNumber()) : 0,
492+
getChildIndex(earliestLeftAncestorParent3),
471493

472494
// info
473495
0, // dummy; we don't store file index into feature vector

0 commit comments

Comments
 (0)