lump all 2nd and above siblings in list to CHILD_INDEX_LIST_ELEMENT. Formatter needs to recomputed "matched token on diff line" after predicting newlines but before predicting alignment.

parrt · parrt · commit 740ecbdd1fb7 · 2016-04-06T17:54:47.000-07:00
diff --git a/java/src/org/antlr/codebuff/CollectFeatures.java b/java/src/org/antlr/codebuff/CollectFeatures.java
@@ -26,6 +26,14 @@ public class CollectFeatures {
 	public static final double MAX_CONTEXT_DIFF_THRESHOLD = 0.13;
 	public static final double MAX_CONTEXT_DIFF_THRESHOLD2 = 0.50;
 
+	/** When computing child indexes, we use this value for any child list
+	 *  element other than the first one.  If a parent has just one X child,
+	 *  we use the actual child index. If parent has two or more X children,
+	 *  and we are not the first X, use CHILD_INDEX_LIST_ELEMENT. If first
+	 *  of two or more X children, use actual child index.
+	 */
+	public static final int CHILD_INDEX_LIST_ELEMENT = 1_111_111_111;
+
 	// Feature values for pair on diff lines feature
 	public static final int NOT_PAIR = -1;
 	public static final int PAIR_ON_SAME_LINE = 0;
@@ -253,7 +261,7 @@ public int getAlignmentCategory(TerminalNode node, Token curToken, int columnDel
 		// at a newline, are we aligned with a prior sibling (in a list) etc...
 		ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(node, curToken);
 		Pair<ParserRuleContext, Integer> pair =
-			earliestAncestorWithChildStartingAtCharPos(earliestLeftAncestor.getParent(), curToken);
+			earliestAncestorWithChildStartingAtCharPos(earliestLeftAncestor, curToken);
 		if ( pair!=null ) {
 			int deltaFromLeftAncestor = getDeltaToAncestor(earliestLeftAncestor, pair.a);
 			aligned = aligncat(deltaFromLeftAncestor, pair.b);
@@ -482,13 +490,13 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
 			matchingSymbolOnDiffLine,
 			curTokenStartsNewLine ? 1 : 0,
 			rulealt(earliestLeftAncestor.getRuleIndex(),earliestLeftAncestor.getAltNumber()),
-			rulealt(earliestLeftAncestorParent.getRuleIndex(), earliestLeftAncestorParent.getAltNumber()),
+			earliestLeftAncestorParent!=null ? rulealt(earliestLeftAncestorParent.getRuleIndex(), earliestLeftAncestorParent.getAltNumber()) : -1,
 			getChildIndex(earliestLeftAncestor),
-			earliestLeftAncestorParent2!=null ? rulealt(earliestLeftAncestorParent2.getRuleIndex(), earliestLeftAncestorParent2.getAltNumber()) : 0,
+			earliestLeftAncestorParent2!=null ? rulealt(earliestLeftAncestorParent2.getRuleIndex(), earliestLeftAncestorParent2.getAltNumber()) : -1,
 			getChildIndex(earliestLeftAncestorParent),
-			earliestLeftAncestorParent3!=null ? rulealt(earliestLeftAncestorParent3.getRuleIndex(), earliestLeftAncestorParent3.getAltNumber()) : 0,
+			earliestLeftAncestorParent3!=null ? rulealt(earliestLeftAncestorParent3.getRuleIndex(), earliestLeftAncestorParent3.getAltNumber()) : -1,
 			getChildIndex(earliestLeftAncestorParent2),
-			earliestLeftAncestorParent4!=null ? rulealt(earliestLeftAncestorParent4.getRuleIndex(), earliestLeftAncestorParent4.getAltNumber()) : 0,
+			earliestLeftAncestorParent4!=null ? rulealt(earliestLeftAncestorParent4.getRuleIndex(), earliestLeftAncestorParent4.getAltNumber()) : -1,
 			getChildIndex(earliestLeftAncestorParent3),
 
 			// info
@@ -507,6 +515,7 @@ public static int getMatchingSymbolOnDiffLine(InputDocument doc,
 	{
 		TerminalNode matchingLeftNode = getMatchingLeftSymbol(doc, node);
 		if (matchingLeftNode != null) {
+//			System.out.println(node.getPayload()+" matches with "+matchingLeftNode.getSymbol());
 			int matchingLeftTokenLine = matchingLeftNode.getSymbol().getLine();
 			return matchingLeftTokenLine != line ? PAIR_ON_DIFF_LINE : PAIR_ON_SAME_LINE;
 		}
@@ -559,15 +568,6 @@ public static List<Integer> viableLeftTokenTypes(ParserRuleContext node,
 		return newPairs;
 	}
 
-	public static Token findAlignedToken(List<Token> tokens, Token leftEdgeToken) {
-		for (Token t : tokens) {
-			if ( t.getCharPositionInLine() == leftEdgeToken.getCharPositionInLine() ) {
-				return t;
-			}
-		}
-		return null;
-	}
-
 	/** Search backwards from tokIndex into 'tokens' stream and get all on-channel
 	 *  tokens on previous line with respect to token at tokIndex.
 	 *  return empty list if none found. First token in returned list is
@@ -761,12 +761,20 @@ public static ParserRuleContext getParent(TerminalNode p) {
 		return parentClosure((ParserRuleContext)p.getParent());
 	}
 
-	public static int getChildIndex(ParseTree t) {
+	public static int getChildIndex(ParserRuleContext t) {
 		if ( t==null ) return -1;
-		ParseTree parent = t.getParent();
+		ParserRuleContext parent = t.getParent();
 		if ( parent==null ) {
 			return -1;
 		}
+		// we know we have a parent now
+		// check to see if we are 2nd or beyond element in a sibling list
+		List<ParserRuleContext> siblings = parent.getRuleContexts(t.getClass());
+		if ( siblings.size()>1 && siblings.indexOf(t)>0 ) {
+			return CHILD_INDEX_LIST_ELEMENT;
+		}
+		// Either first of sibling list or not in a list.
+		// Figure out which child index t is of parent
 		for (int i = 0; i<parent.getChildCount(); i++) {
 			if ( parent.getChild(i)==t ) {
 				return i;
diff --git a/java/src/org/antlr/codebuff/Formatter.java b/java/src/org/antlr/codebuff/Formatter.java
@@ -19,11 +19,14 @@
 import static org.antlr.codebuff.CollectFeatures.CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN;
 import static org.antlr.codebuff.CollectFeatures.CAT_INJECT_NL;
 import static org.antlr.codebuff.CollectFeatures.CAT_INJECT_WS;
+import static org.antlr.codebuff.CollectFeatures.CAT_NO_ALIGNMENT;
 import static org.antlr.codebuff.CollectFeatures.FEATURES_ALIGN;
 import static org.antlr.codebuff.CollectFeatures.FEATURES_INJECT_WS;
 import static org.antlr.codebuff.CollectFeatures.INDEX_FIRST_ON_LINE;
+import static org.antlr.codebuff.CollectFeatures.INDEX_MATCHING_TOKEN_DIFF_LINE;
 import static org.antlr.codebuff.CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD;
 import static org.antlr.codebuff.CollectFeatures.earliestAncestorStartingWithToken;
+import static org.antlr.codebuff.CollectFeatures.getMatchingSymbolOnDiffLine;
 import static org.antlr.codebuff.CollectFeatures.getNodeFeatures;
 import static org.antlr.codebuff.CollectFeatures.getRealTokens;
 import static org.antlr.codebuff.CollectFeatures.getTokensOnPreviousLine;
@@ -111,6 +114,7 @@ public String format() {
 	public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
 		CommonToken curToken = (CommonToken)tokens.get(tokenIndexInStream);
 		String tokText = curToken.getText();
+		TerminalNode node = tokenToNodeMap.get(curToken);
 
 		emitCommentsToTheLeft(tokenIndexInStream);
 
@@ -129,23 +133,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
 			ws = CollectFeatures.unwscat(injectNL_WS);
 		}
 
-		// getNodeFeatures() also doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
-		features[INDEX_FIRST_ON_LINE] = newlines>0 ? 1 : 0; // use \n prediction to match exemplars for alignment
-
-		int align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);
-
-		TokenPositionAnalysis tokenPositionAnalysis =
-			getTokenAnalysis(features, indexIntoRealTokens, tokenIndexInStream, newlines, align, ws);
-		analysis.setSize(tokenIndexInStream+1);
-		analysis.set(tokenIndexInStream, tokenPositionAnalysis);
-
 		if ( ws==0 && cannotJoin(realTokens.get(indexIntoRealTokens-1), curToken) ) { // failsafe!
 			ws = 1;
 		}
 
+		int align = CAT_NO_ALIGNMENT;
+
 		if ( newlines>0 ) {
 			output.append(Tool.newlines(newlines));
-			line++;
+			line+=newlines;
 			charPosInLine = 0;
 
 			List<Token> tokensOnPreviousLine = getTokensOnPreviousLine(tokens, tokenIndexInStream, line);
@@ -154,9 +150,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
 				firstTokenOnPrevLine = tokensOnPreviousLine.get(0);
 			}
 
-			TerminalNode node = tokenToNodeMap.get(curToken);
 			ParserRuleContext parent = (ParserRuleContext)node.getParent();
 
+			// getNodeFeatures() doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
+			features[INDEX_FIRST_ON_LINE] = newlines>0 ? 1 : 0; // use \n prediction to match exemplars for alignment
+			// if we decide to inject a newline, we better recompute this value before classifying alignment
+			features[INDEX_MATCHING_TOKEN_DIFF_LINE] = getMatchingSymbolOnDiffLine(doc, node, line);
+
+			align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);
+
 			if ( align==CAT_INDENT ) {
 				if ( firstTokenOnPrevLine!=null ) { // if not on first line, we cannot indent
 					int indentedCol = firstTokenOnPrevLine.getCharPositionInLine()+INDENT_LEVEL;
@@ -204,6 +206,11 @@ else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
 			charPosInLine += ws;
 		}
 
+		TokenPositionAnalysis tokenPositionAnalysis =
+			getTokenAnalysis(features, indexIntoRealTokens, tokenIndexInStream, newlines, align, ws);
+		analysis.setSize(tokenIndexInStream+1);
+		analysis.set(tokenIndexInStream, tokenPositionAnalysis);
+
 		// update Token object with position information now that we are about
 		// to emit it.
 		curToken.setLine(line);