2323import java .util .Map ;
2424
2525public class CollectFeatures {
26- public static final double MAX_CONTEXT_DIFF_THRESHOLD = 0.20 ;
26+ public static final double MAX_CONTEXT_DIFF_THRESHOLD = 0.13 ;
2727 public static final double MAX_CONTEXT_DIFF_THRESHOLD2 = 0.50 ;
2828
2929 // Feature values for pair on diff lines feature
@@ -77,12 +77,16 @@ public class CollectFeatures {
7777 public static final int INDEX_ANCESTORS_PARENT_CHILD_INDEX = 7 ;
7878 public static final int INDEX_ANCESTORS_PARENT2_RULE = 8 ;
7979 public static final int INDEX_ANCESTORS_PARENT2_CHILD_INDEX = 9 ;
80+ public static final int INDEX_ANCESTORS_PARENT3_RULE = 10 ;
81+ public static final int INDEX_ANCESTORS_PARENT3_CHILD_INDEX = 11 ;
82+ public static final int INDEX_ANCESTORS_PARENT4_RULE = 12 ;
83+ public static final int INDEX_ANCESTORS_PARENT4_CHILD_INDEX = 13 ;
8084
81- public static final int INDEX_INFO_FILE = 10 ;
82- public static final int INDEX_INFO_LINE = 11 ;
83- public static final int INDEX_INFO_CHARPOS = 12 ;
85+ public static final int INDEX_INFO_FILE = 14 ;
86+ public static final int INDEX_INFO_LINE = 15 ;
87+ public static final int INDEX_INFO_CHARPOS = 16 ;
8488
85- public static final int NUM_FEATURES = 13 ;
89+ public static final int NUM_FEATURES = 17 ;
8690
8791// public static final int INDEX_RULE = 8; // what rule are we in?
8892// public static final int INDEX_EARLIEST_RIGHT_ANCESTOR = 9;
@@ -104,11 +108,15 @@ public class CollectFeatures {
104108 new FeatureMetaData (FeatureType .BOOL , new String []{"Pair" , "dif\\ n" }, 1 ),
105109 FeatureMetaData .UNUSED ,
106110 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 1 ),
111+ // these previous 5 features seem to predict newline really well. whitespace ok too
112+ FeatureMetaData .UNUSED ,
113+ FeatureMetaData .UNUSED ,
114+ FeatureMetaData .UNUSED ,
115+ FeatureMetaData .UNUSED ,
107116 FeatureMetaData .UNUSED ,
108117 FeatureMetaData .UNUSED ,
109118 FeatureMetaData .UNUSED ,
110119 FeatureMetaData .UNUSED ,
111- // these 6 features seem to predict newline really well. whitespace ok too
112120 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
113121 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
114122 new FeatureMetaData (FeatureType .INFO_CHARPOS , new String [] {"char" , "pos" }, 0 )
@@ -125,6 +133,10 @@ public class CollectFeatures {
125133 new FeatureMetaData (FeatureType .INT , new String [] {"parent" , "child index" }, 1 ),
126134 new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^2" }, 1 ),
127135 new FeatureMetaData (FeatureType .INT , new String [] {"parent^2" , "child index" }, 1 ),
136+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^3" }, 1 ),
137+ new FeatureMetaData (FeatureType .INT , new String [] {"parent^3" , "child index" }, 1 ),
138+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^4" }, 1 ),
139+ new FeatureMetaData (FeatureType .INT , new String [] {"parent^4" , "child index" }, 1 ),
128140 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
129141 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
130142 new FeatureMetaData (FeatureType .INFO_CHARPOS , new String [] {"char" , "pos" }, 0 )
@@ -141,6 +153,10 @@ public class CollectFeatures {
141153 new FeatureMetaData (FeatureType .INT , new String [] {"parent" , "child index" }, 1 ),
142154 new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^2" }, 1 ),
143155 new FeatureMetaData (FeatureType .INT , new String [] {"parent^2" , "child index" }, 1 ),
156+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^3" }, 1 ),
157+ new FeatureMetaData (FeatureType .INT , new String [] {"parent^3" , "child index" }, 1 ),
158+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^4" }, 1 ),
159+ new FeatureMetaData (FeatureType .INT , new String [] {"parent^4" , "child index" }, 1 ),
144160 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
145161 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
146162 new FeatureMetaData (FeatureType .INFO_CHARPOS , new String [] {"char" , "pos" }, 0 )
@@ -423,6 +439,8 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
423439 ParserRuleContext earliestLeftAncestorParent = earliestLeftAncestor .getParent ();
424440
425441 ParserRuleContext earliestLeftAncestorParent2 = earliestLeftAncestorParent !=null ? earliestLeftAncestorParent .getParent () : null ;
442+ ParserRuleContext earliestLeftAncestorParent3 = earliestLeftAncestorParent2 !=null ? earliestLeftAncestorParent2 .getParent () : null ;
443+ ParserRuleContext earliestLeftAncestorParent4 = earliestLeftAncestorParent3 !=null ? earliestLeftAncestorParent3 .getParent () : null ;
426444
427445 ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken (node , curToken );
428446 int earliestRightAncestorRuleIndex = earliestRightAncestor .getRuleIndex ();
@@ -468,6 +486,10 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
468486 getChildIndex (earliestLeftAncestor ),
469487 earliestLeftAncestorParent2 !=null ? rulealt (earliestLeftAncestorParent2 .getRuleIndex (), earliestLeftAncestorParent2 .getAltNumber ()) : 0 ,
470488 getChildIndex (earliestLeftAncestorParent ),
489+ earliestLeftAncestorParent3 !=null ? rulealt (earliestLeftAncestorParent3 .getRuleIndex (), earliestLeftAncestorParent3 .getAltNumber ()) : 0 ,
490+ getChildIndex (earliestLeftAncestorParent2 ),
491+ earliestLeftAncestorParent4 !=null ? rulealt (earliestLeftAncestorParent4 .getRuleIndex (), earliestLeftAncestorParent4 .getAltNumber ()) : 0 ,
492+ getChildIndex (earliestLeftAncestorParent3 ),
471493
472494 // info
473495 0 , // dummy; we don't store file index into feature vector
0 commit comments