66import org .antlr .v4 .runtime .ParserRuleContext ;
77import org .antlr .v4 .runtime .Token ;
88import org .antlr .v4 .runtime .Vocabulary ;
9+ import org .antlr .v4 .runtime .atn .ATN ;
910import org .antlr .v4 .runtime .misc .Pair ;
1011import org .antlr .v4 .runtime .tree .ErrorNode ;
1112import org .antlr .v4 .runtime .tree .ParseTreeListener ;
@@ -38,7 +39,21 @@ public class CollectFeatures {
3839 public static final int CAT_ALIGN_WITH_LIST_FIRST_ELEMENT = 3 ;
3940 public static final int CAT_ALIGN_WITH_PAIR = 4 ;
4041
41- public static final int CAT_INDENT = 100 ;
42+ /* We want to identify indentation from a parent's start token but that
43+ parent could be a number of levels up the tree. The next category
44+ values indicate indentation from the current token's left ancestor's
45+ parent then it's parent and so on. For category value:
46+
47+ CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN + i
48+
49+ current token is indented from start token of node i levels up
50+ from ancestor.
51+ */
52+ public static final int CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN = 100 ; // left ancestor's first token is really current token
53+
54+ public static final int CAT_INDENT = 200 ;
55+
56+ // indexes into feature vector
4257
4358 public static final int INDEX_PREV2_TYPE = 0 ;
4459 public static final int INDEX_PREV_TYPE = 1 ;
@@ -52,12 +67,16 @@ public class CollectFeatures {
5267 public static final int INDEX_RULE = 9 ; // what rule are we in?
5368 public static final int INDEX_EARLIEST_RIGHT_ANCESTOR = 10 ;
5469 public static final int INDEX_EARLIEST_LEFT_ANCESTOR = 11 ;
55- public static final int INDEX_NEXT_TYPE = 12 ;
56- public static final int INDEX_INFO_FILE = 13 ;
57- public static final int INDEX_INFO_LINE = 14 ;
58- public static final int INDEX_INFO_CHARPOS = 15 ;
70+ public static final int INDEX_ANCESTORS_PARENT4_RULE = 12 ;
71+ public static final int INDEX_ANCESTORS_PARENT3_RULE = 13 ;
72+ public static final int INDEX_ANCESTORS_PARENT2_RULE = 14 ;
73+ public static final int INDEX_ANCESTORS_PARENT_RULE = 15 ;
74+ public static final int INDEX_NEXT_TYPE = 16 ;
75+ public static final int INDEX_INFO_FILE = 17 ;
76+ public static final int INDEX_INFO_LINE = 18 ;
77+ public static final int INDEX_INFO_CHARPOS = 19 ;
5978
60- public static final int NUM_FEATURES = 15 ;
79+ public static final int NUM_FEATURES = 20 ;
6180
6281 public static FeatureMetaData [] FEATURES_INJECT_NL = {
6382 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-2)" }, 1 ),
@@ -72,6 +91,10 @@ public class CollectFeatures {
7291 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "rule" }, 2 ),
7392 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "right ancestor" }, 3 ),
7493 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 3 ),
94+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^4" }, 1 ),
95+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^3" }, 1 ),
96+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^2" }, 1 ),
97+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent" }, 1 ),
7598 FeatureMetaData .UNUSED ,
7699 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
77100 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
@@ -92,6 +115,10 @@ public class CollectFeatures {
92115 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "rule" }, 2 ),
93116 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "right ancestor" }, 3 ),
94117 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 3 ),
118+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^4" }, 1 ),
119+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^3" }, 1 ),
120+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^2" }, 1 ),
121+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent" }, 1 ),
95122 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(2)" }, 1 ),
96123 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
97124 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
@@ -111,6 +138,10 @@ public class CollectFeatures {
111138 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "rule" }, 2 ),
112139 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "right ancestor" }, 3 ),
113140 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 3 ),
141+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^4" }, 1 ),
142+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^3" }, 1 ),
143+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^2" }, 1 ),
144+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent" }, 1 ),
114145 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(2)" }, 1 ),
115146 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
116147 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
@@ -130,6 +161,10 @@ public class CollectFeatures {
130161 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "rule" }, 2 ),
131162 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "right ancestor" }, 3 ),
132163 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 3 ),
164+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^4" }, 1 ),
165+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^3" }, 1 ),
166+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^2" }, 1 ),
167+ new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent" }, 1 ),
133168 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(2)" }, 1 ),
134169 new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
135170 new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
@@ -257,8 +292,19 @@ else if ( earliestAncestorsParentStart!=null &&
257292 {
258293 aligned = CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN ;
259294 }
260- else if ( columnDelta >0 ) {
261- aligned = CAT_INDENT ; // indent standard amount
295+ else if ( columnDelta !=0 ) {
296+ ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken (parent , curToken );
297+ ParserRuleContext ancestorParent = getParent (earliestLeftAncestor );
298+ int indentedFromPos = curToken .getCharPositionInLine ()-Formatter .INDENT_LEVEL ;
299+ ParserRuleContext indentParent =
300+ earliestAncestorStartingAtCharPos (ancestorParent , indentedFromPos );
301+ if ( indentParent !=null ) {
302+ int deltaFromLeftAncestor = getDeltaToAncestor (earliestLeftAncestor , indentParent );
303+ aligned = CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN +deltaFromLeftAncestor ;
304+ }
305+ else {
306+ aligned = CAT_INDENT ; // indent standard amount
307+ }
262308 }
263309
264310 return aligned ;
@@ -405,6 +451,55 @@ public static ParserRuleContext earliestAncestorEndingWithToken(ParserRuleContex
405451 return prev ;
406452 }
407453
454+ /** Walk upwards from node until we find p.start at char position and p.start
455+ * is first token on a line; return null if there is no such ancestor p.
456+ */
457+ public ParserRuleContext earliestAncestorStartingAtCharPos (ParserRuleContext node , int charpos ) {
458+ ParserRuleContext p = node ;
459+ while ( p !=null ) {
460+ if ( isFirstOnLine (p .getStart ()) && p .getStart ().getCharPositionInLine ()==charpos ) {
461+ return p ;
462+ }
463+ p = p .getParent ();
464+ }
465+ return null ;
466+ }
467+
468+ /** Return the number of hops to get to ancestor from node or -1 if we
469+ * don't find ancestor on path to root.
470+ */
471+ public static int getDeltaToAncestor (ParserRuleContext node , ParserRuleContext ancestor ) {
472+ int n = 0 ;
473+ ParserRuleContext p = node ;
474+ while ( p !=null && p !=ancestor ) {
475+ n ++;
476+ p = p .getParent ();
477+ }
478+ if ( p ==null ) return -1 ;
479+ return n ;
480+ }
481+
482+ public static ParserRuleContext getAncestor (ParserRuleContext node , int delta ) {
483+ System .out .print (node .getText ()+" " +JavaParser .ruleNames [node .getRuleIndex ()]+"+" +delta );
484+ int n = 0 ;
485+ ParserRuleContext p = node ;
486+ while ( p !=null && n !=delta ) {
487+ n ++;
488+ p = p .getParent ();
489+ }
490+ System .out .println (" is " +JavaParser .ruleNames [p .getRuleIndex ()]+":" +p .getAltNumber ());
491+ return p ;
492+ }
493+
494+ public boolean isFirstOnLine (Token t ) {
495+ tokens .seek (t .getTokenIndex ()); // LT(1)
496+ Token prevToken = tokens .LT (-1 );
497+ if ( prevToken ==null ) {
498+ return true ; // if we are first token, must be first on line
499+ }
500+ return t .getLine ()>prevToken .getLine ();
501+ }
502+
408503 public static ParserRuleContext deepestCommonAncestor (ParserRuleContext t1 , ParserRuleContext t2 ) {
409504 if ( t1 ==t2 ) return t1 ;
410505 List <? extends Tree > t1_ancestors = Trees .getAncestors (t1 );
@@ -443,29 +538,37 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
443538 TerminalNode prevTerminalNode = tokenToNodeMap .get (prevToken );
444539 ParserRuleContext parent = (ParserRuleContext )prevTerminalNode .getParent ();
445540 int prevTokenRuleIndex = parent .getRuleIndex ();
541+ int prevTokenRuleAltNum = parent .getAltNumber ();
446542 ParserRuleContext prevEarliestRightAncestor = earliestAncestorEndingWithToken (parent , prevToken );
447543 int prevEarliestAncestorRuleIndex = -1 ;
544+ int prevEarliestAncestorRuleAltNum = 0 ;
448545 int prevEarliestAncestorWidth = -1 ;
449546 if ( prevEarliestRightAncestor !=null ) {
450547 prevEarliestAncestorRuleIndex = prevEarliestRightAncestor .getRuleIndex ();
548+ prevEarliestAncestorRuleAltNum = prevEarliestRightAncestor .getAltNumber ();
451549 prevEarliestAncestorWidth = prevEarliestRightAncestor .stop .getStopIndex ()-prevEarliestRightAncestor .start .getStartIndex ()+1 ;
452550 }
453551
454552 // Get context information for current token
455553 parent = (ParserRuleContext )node .getParent ();
456554 int curTokensParentRuleIndex = parent .getRuleIndex ();
555+ int curTokensParentRuleAltNumber = parent .getAltNumber ();
457556 ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken (parent , curToken );
458557 int earliestAncestorWidth = -1 ;
459558 int earliestLeftAncestorRuleIndex = -1 ;
559+ int earliestLeftAncestorRuleAlt = 0 ;
460560 if ( earliestLeftAncestor !=null ) {
461561 earliestLeftAncestorRuleIndex = earliestLeftAncestor .getRuleIndex ();
562+ earliestLeftAncestorRuleAlt = earliestLeftAncestor .getAltNumber ();
462563 earliestAncestorWidth = earliestLeftAncestor .stop .getStopIndex ()-earliestLeftAncestor .start .getStartIndex ()+1 ;
463564 }
464565
465566 ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken (parent , curToken );
466567 int earliestRightAncestorRuleIndex = -1 ;
568+ int earliestRightAncestorRuleAlt = 0 ;
467569 if ( earliestRightAncestor !=null ) {
468570 earliestRightAncestorRuleIndex = earliestRightAncestor .getRuleIndex ();
571+ earliestRightAncestorRuleAlt = earliestRightAncestor .getAltNumber ();
469572 }
470573 int prevTokenEndCharPos = window .get (1 ).getCharPositionInLine () + window .get (1 ).getText ().length ();
471574
@@ -479,22 +582,40 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
479582 // TODO: I don't think we can detect first element of list
480583 boolean startOfList = isFirstSiblingOfList (tokenToNodeMap , curToken );
481584
585+ // Get some context from parse tree
586+ ParserRuleContext ancestorParent = null ;
587+ ParserRuleContext ancestorParent2 = null ;
588+ if ( earliestLeftAncestor ==null ) { // just use regular parent then
589+ ancestorParent = getParent (node );
590+ ancestorParent2 = ancestorParent .getParent (); // get immediate parent for context
591+ }
592+ else {
593+ ancestorParent = getParent (earliestLeftAncestor ); // get parent but skip chain rules
594+ ancestorParent2 = ancestorParent .getParent (); // get immediate parent for context
595+ }
596+ ParserRuleContext ancestorParent3 = ancestorParent2 !=null ? ancestorParent2 .getParent () : null ;
597+ ParserRuleContext ancestorParent4 = ancestorParent3 !=null ? ancestorParent3 .getParent () : null ;
598+
482599 boolean curTokenStartsNewLine = window .get (2 ).getLine ()>window .get (1 ).getLine ();
483600 int [] features = {
484601 window .get (0 ).getType (),
485-
486602 window .get (1 ).getType (),
487- prevTokenRuleIndex ,
603+ rulealt ( prevTokenRuleIndex , prevTokenRuleAltNum ) ,
488604 prevTokenEndCharPos ,
489- prevEarliestAncestorRuleIndex ,
605+ rulealt ( prevEarliestAncestorRuleIndex , prevEarliestAncestorRuleAltNum ) ,
490606
491607 window .get (2 ).getType (), // LT(1)
492608 startOfList ? 1 : 0 ,
493609 matchingSymbolOnDiffLine ,
494610 curTokenStartsNewLine ? 1 : 0 ,
495- curTokensParentRuleIndex ,
496- earliestRightAncestorRuleIndex ,
497- earliestLeftAncestorRuleIndex ,
611+ rulealt (curTokensParentRuleIndex ,curTokensParentRuleAltNumber ),
612+ rulealt (earliestRightAncestorRuleIndex ,earliestRightAncestorRuleAlt ),
613+ rulealt (earliestLeftAncestorRuleIndex ,earliestLeftAncestorRuleAlt ),
614+ ancestorParent4 !=null ? rulealt (ancestorParent4 .getRuleIndex (),ancestorParent4 .getAltNumber ()) : -1 ,
615+ ancestorParent3 !=null ? rulealt (ancestorParent3 .getRuleIndex (),ancestorParent3 .getAltNumber ()) : -1 ,
616+ ancestorParent2 !=null ? rulealt (ancestorParent2 .getRuleIndex (),ancestorParent2 .getAltNumber ()) : -1 ,
617+ rulealt (ancestorParent .getRuleIndex (),ancestorParent .getAltNumber ()), // always at least token's parent exists
618+
498619 window .get (3 ).getType (),
499620
500621 // info
@@ -657,7 +778,9 @@ public static String _toString(FeatureMetaData[] FEATURES, InputDocument doc, in
657778 break ;
658779 case RULE :
659780 if ( features [i ]>=0 ) {
660- String ruleName = ruleNames [features [i ]];
781+ String ruleName = ruleNames [unrulealt (features [i ])[0 ]];
782+ int ruleAltNum = unrulealt (features [i ])[1 ];
783+ ruleName += ":" +ruleAltNum ;
661784 abbrev = StringUtils .abbreviateMiddle (ruleName , "*" , displayWidth );
662785 buf .append (String .format ("%" +displayWidth +"s" , abbrev ));
663786 }
@@ -782,11 +905,30 @@ public static List<Token> getRealTokens(CommonTokenStream tokens) {
782905 return real ;
783906 }
784907
908+ public static ParserRuleContext getParent (TerminalNode p ) {
909+ return getParent ((ParserRuleContext )p .getParent ());
910+ }
911+
785912 /** Same as p.getParent() except we scan through chain rule nodes */
786913 public static ParserRuleContext getParent (ParserRuleContext p ) {
787914 if ( p ==null ) return null ;
788915 ParserRuleContext lastValidParent = p .getParent ();
789- // now try to walk chain rules starting with the parent of the usual parent
916+ if ( lastValidParent ==null ) return null ; // must have hit the root
917+
918+ return parentClosure (p .getParent ());
919+ //
920+ // // now try to walk chain rules starting with the parent of the usual parent
921+ // ParserRuleContext q = lastValidParent.getParent();
922+ // while ( q!=null && q.getChildCount()==1 ) { // while is a chain rule
923+ // lastValidParent = q;
924+ // q = q.getParent();
925+ // }
926+ // return lastValidParent;
927+ }
928+
929+ // try to walk chain rules starting with the parent of the usual parent
930+ public static ParserRuleContext parentClosure (ParserRuleContext p ) {
931+ ParserRuleContext lastValidParent = p ;
790932 ParserRuleContext q = lastValidParent .getParent ();
791933 while ( q !=null && q .getChildCount ()==1 ) { // while is a chain rule
792934 lastValidParent = q ;
@@ -795,4 +937,15 @@ public static ParserRuleContext getParent(ParserRuleContext p) {
795937 return lastValidParent ;
796938 }
797939
940+ /** Pack a rule index and an alternative number into the same 32-bit integer. */
941+ public static int rulealt (int rule , int alt ) {
942+ if ( rule ==-1 ) return -1 ;
943+ return rule <<16 | alt ;
944+ }
945+
946+ /** Return {rule index, rule alt number} */
947+ public static int [] unrulealt (int ra ) {
948+ if ( ra ==-1 ) return new int [] {-1 , ATN .INVALID_ALT_NUMBER };
949+ return new int [] {(ra >>16 )&0xFFFF ,ra &0xFFFF };
950+ }
798951}
0 commit comments