@@ -73,12 +73,16 @@ public class CollectFeatures {
7373 public static final int INDEX_MATCHING_TOKEN_DIFF_LINE = 3 ;
7474 public static final int INDEX_FIRST_ON_LINE = 4 ; // a \n right before this token?
7575 public static final int INDEX_EARLIEST_LEFT_ANCESTOR = 5 ;
76+ public static final int INDEX_ANCESTORS_PARENT_RULE = 6 ;
77+ public static final int INDEX_ANCESTORS_PARENT_CHILD_INDEX = 7 ;
78+ public static final int INDEX_ANCESTORS_PARENT2_RULE = 8 ;
79+ public static final int INDEX_ANCESTORS_PARENT2_CHILD_INDEX = 9 ;
7680
77- public static final int INDEX_INFO_FILE = 6 ;
78- public static final int INDEX_INFO_LINE = 7 ;
79- public static final int INDEX_INFO_CHARPOS = 8 ;
81+ public static final int INDEX_INFO_FILE = 10 ;
82+ public static final int INDEX_INFO_LINE = 11 ;
83+ public static final int INDEX_INFO_CHARPOS = 12 ;
8084
81- public static final int NUM_FEATURES = 9 ;
85+ public static final int NUM_FEATURES = 13 ;
8286
8387// public static final int INDEX_RULE = 8; // what rule are we in?
8488// public static final int INDEX_EARLIEST_RIGHT_ANCESTOR = 9;
@@ -98,8 +102,12 @@ public class CollectFeatures {
98102 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(-1)" , "right ancestor" }, 1 ),
99103 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(1)" }, 1 ),
100104 new FeatureMetaData (FeatureType .BOOL , new String []{"Pair" , "dif\\ n" }, 1 ),
101- new FeatureMetaData ( FeatureType . BOOL , new String []{ "Strt" , "line" }, 1 ) ,
105+ FeatureMetaData . UNUSED ,
102106 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 1 ),
107+ FeatureMetaData .UNUSED ,
108+ FeatureMetaData .UNUSED ,
109+ FeatureMetaData .UNUSED ,
110+ FeatureMetaData .UNUSED ,
103111 // these 6 features seem to predict newline really well. whitespace ok too
104112 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
105113 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
@@ -110,9 +118,13 @@ public class CollectFeatures {
110118 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-1)" }, 1 ),
111119 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(-1)" , "right ancestor" }, 1 ),
112120 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(1)" }, 1 ),
113- new FeatureMetaData (FeatureType .BOOL , new String []{"Pair" , "dif\\ n" }, 1 ),
114- new FeatureMetaData (FeatureType .BOOL , new String []{"Strt" , "line" }, 1 ),
121+ new FeatureMetaData (FeatureType .BOOL , new String [] {"Pair" , "dif\\ n" }, 1 ),
122+ new FeatureMetaData (FeatureType .BOOL , new String [] {"Strt" , "line" }, 1 ),
115123 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 1 ),
124+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent" }, 1 ),
125+ new FeatureMetaData (FeatureType .INT , new String [] {"parent" , "child index" }, 1 ),
126+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^2" }, 1 ),
127+ new FeatureMetaData (FeatureType .INT , new String [] {"parent^2" , "child index" }, 1 ),
116128 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
117129 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
118130 new FeatureMetaData (FeatureType .INFO_CHARPOS , new String [] {"char" , "pos" }, 0 )
@@ -122,9 +134,13 @@ public class CollectFeatures {
122134 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-1)" }, 1 ),
123135 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(-1)" , "right ancestor" }, 1 ),
124136 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(1)" }, 1 ),
125- new FeatureMetaData (FeatureType .BOOL , new String []{"Pair" , "dif\\ n" }, 1 ),
126- new FeatureMetaData (FeatureType .BOOL , new String []{"Strt" , "line" }, 1 ),
137+ new FeatureMetaData (FeatureType .BOOL , new String [] {"Pair" , "dif\\ n" }, 1 ),
138+ new FeatureMetaData (FeatureType .BOOL , new String [] {"Strt" , "line" }, 1 ),
127139 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 1 ),
140+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent" }, 1 ),
141+ new FeatureMetaData (FeatureType .INT , new String [] {"parent" , "child index" }, 1 ),
142+ new FeatureMetaData (FeatureType .RULE , new String [] {"" , "parent^2" }, 1 ),
143+ new FeatureMetaData (FeatureType .INT , new String [] {"parent^2" , "child index" }, 1 ),
128144 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
129145 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
130146 new FeatureMetaData (FeatureType .INFO_CHARPOS , new String [] {"char" , "pos" }, 0 )
@@ -404,6 +420,9 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
404420 ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken (node , curToken );
405421 int earliestLeftAncestorRuleIndex = earliestLeftAncestor .getRuleIndex ();
406422 int earliestLeftAncestorRuleAlt = earliestLeftAncestor .getAltNumber ();
423+ ParserRuleContext earliestLeftAncestorParent = earliestLeftAncestor .getParent ();
424+
425+ ParserRuleContext earliestLeftAncestorParent2 = earliestLeftAncestorParent !=null ? earliestLeftAncestorParent .getParent () : null ;
407426
408427 ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken (node , curToken );
409428 int earliestRightAncestorRuleIndex = earliestRightAncestor .getRuleIndex ();
@@ -412,8 +431,6 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
412431 int matchingSymbolOnDiffLine = getMatchingSymbolOnDiffLine (doc , node , line );
413432
414433 // Get some context from parse tree
415- ParserRuleContext ancestorParent = null ;
416- ParserRuleContext ancestorParent2 = null ;
417434// if ( earliestLeftAncestor==null ) { // just use regular parent then
418435// ancestorParent = getParent(node);
419436// if ( ancestorParent!=null ) {
@@ -436,6 +453,8 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
436453// public static final int INDEX_MATCHING_TOKEN_DIFF_LINE = 3;
437454// public static final int INDEX_FIRST_ON_LINE = 4; // a \n right before this token?
438455// public static final int INDEX_EARLIEST_LEFT_ANCESTOR = 5;
456+ // new FeatureMetaData(FeatureType.RULE, new String[] {"left ancestor", "parent"}, 1),
457+ // new FeatureMetaData(FeatureType.INT, new String[] {"left ancestor", "child index"}, 1),
439458
440459 boolean curTokenStartsNewLine = tokens .LT (1 ).getLine ()>tokens .LT (-1 ).getLine ();
441460 int [] features = {
@@ -444,7 +463,11 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
444463 tokens .LT (1 ).getType (),
445464 matchingSymbolOnDiffLine ,
446465 curTokenStartsNewLine ? 1 : 0 ,
447- rulealt (earliestLeftAncestorRuleIndex ,earliestLeftAncestorRuleAlt ),
466+ rulealt (earliestLeftAncestor .getRuleIndex (),earliestLeftAncestor .getAltNumber ()),
467+ rulealt (earliestLeftAncestorParent .getRuleIndex (), earliestLeftAncestorParent .getAltNumber ()),
468+ getChildIndex (earliestLeftAncestor ),
469+ earliestLeftAncestorParent2 !=null ? rulealt (earliestLeftAncestorParent2 .getRuleIndex (), earliestLeftAncestorParent2 .getAltNumber ()) : 0 ,
470+ getChildIndex (earliestLeftAncestorParent ),
448471
449472 // info
450473 0 , // dummy; we don't store file index into feature vector
@@ -600,7 +623,7 @@ public static String _toString(FeatureMetaData[] FEATURES, InputDocument doc, in
600623 case INFO_LINE :
601624 case INFO_CHARPOS :
602625 if ( features [i ]>=0 ) {
603- buf .append (String .format ("%" +displayWidth +"s" , String .valueOf (features [i ])));
626+ buf .append (String .format ("%" +displayWidth +"s" , StringUtils . center ( String .valueOf (features [i ]), displayWidth )));
604627 }
605628 else {
606629 buf .append (Tool .sequence (displayWidth , " " ));
@@ -716,6 +739,20 @@ public static ParserRuleContext getParent(TerminalNode p) {
716739 return parentClosure ((ParserRuleContext )p .getParent ());
717740 }
718741
742+ public static int getChildIndex (ParseTree t ) {
743+ if ( t ==null ) return -1 ;
744+ ParseTree parent = t .getParent ();
745+ if ( parent ==null ) {
746+ return -1 ;
747+ }
748+ for (int i = 0 ; i <parent .getChildCount (); i ++) {
749+ if ( parent .getChild (i )==t ) {
750+ return i ;
751+ }
752+ }
753+ return -1 ;
754+ }
755+
719756 /** Same as p.getParent() except we scan through chain rule nodes */
720757 public static ParserRuleContext getParent (ParserRuleContext p ) {
721758 if ( p ==null ) return null ;
0 commit comments