Skip to content

Commit 740ecbd

Browse files
committed
lump all 2nd and above siblings in list to CHILD_INDEX_LIST_ELEMENT. Formatter needs to recomputed "matched token on diff line" after predicting newlines but before predicting alignment.
1 parent 1c722df commit 740ecbd

File tree

2 files changed

+43
-28
lines changed

2 files changed

+43
-28
lines changed

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ public class CollectFeatures {
2626
public static final double MAX_CONTEXT_DIFF_THRESHOLD = 0.13;
2727
public static final double MAX_CONTEXT_DIFF_THRESHOLD2 = 0.50;
2828

29+
/** When computing child indexes, we use this value for any child list
30+
* element other than the first one. If a parent has just one X child,
31+
* we use the actual child index. If parent has two or more X children,
32+
* and we are not the first X, use CHILD_INDEX_LIST_ELEMENT. If first
33+
* of two or more X children, use actual child index.
34+
*/
35+
public static final int CHILD_INDEX_LIST_ELEMENT = 1_111_111_111;
36+
2937
// Feature values for pair on diff lines feature
3038
public static final int NOT_PAIR = -1;
3139
public static final int PAIR_ON_SAME_LINE = 0;
@@ -253,7 +261,7 @@ public int getAlignmentCategory(TerminalNode node, Token curToken, int columnDel
253261
// at a newline, are we aligned with a prior sibling (in a list) etc...
254262
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(node, curToken);
255263
Pair<ParserRuleContext, Integer> pair =
256-
earliestAncestorWithChildStartingAtCharPos(earliestLeftAncestor.getParent(), curToken);
264+
earliestAncestorWithChildStartingAtCharPos(earliestLeftAncestor, curToken);
257265
if ( pair!=null ) {
258266
int deltaFromLeftAncestor = getDeltaToAncestor(earliestLeftAncestor, pair.a);
259267
aligned = aligncat(deltaFromLeftAncestor, pair.b);
@@ -482,13 +490,13 @@ public static int[] getNodeFeatures(Map<Token, TerminalNode> tokenToNodeMap,
482490
matchingSymbolOnDiffLine,
483491
curTokenStartsNewLine ? 1 : 0,
484492
rulealt(earliestLeftAncestor.getRuleIndex(),earliestLeftAncestor.getAltNumber()),
485-
rulealt(earliestLeftAncestorParent.getRuleIndex(), earliestLeftAncestorParent.getAltNumber()),
493+
earliestLeftAncestorParent!=null ? rulealt(earliestLeftAncestorParent.getRuleIndex(), earliestLeftAncestorParent.getAltNumber()) : -1,
486494
getChildIndex(earliestLeftAncestor),
487-
earliestLeftAncestorParent2!=null ? rulealt(earliestLeftAncestorParent2.getRuleIndex(), earliestLeftAncestorParent2.getAltNumber()) : 0,
495+
earliestLeftAncestorParent2!=null ? rulealt(earliestLeftAncestorParent2.getRuleIndex(), earliestLeftAncestorParent2.getAltNumber()) : -1,
488496
getChildIndex(earliestLeftAncestorParent),
489-
earliestLeftAncestorParent3!=null ? rulealt(earliestLeftAncestorParent3.getRuleIndex(), earliestLeftAncestorParent3.getAltNumber()) : 0,
497+
earliestLeftAncestorParent3!=null ? rulealt(earliestLeftAncestorParent3.getRuleIndex(), earliestLeftAncestorParent3.getAltNumber()) : -1,
490498
getChildIndex(earliestLeftAncestorParent2),
491-
earliestLeftAncestorParent4!=null ? rulealt(earliestLeftAncestorParent4.getRuleIndex(), earliestLeftAncestorParent4.getAltNumber()) : 0,
499+
earliestLeftAncestorParent4!=null ? rulealt(earliestLeftAncestorParent4.getRuleIndex(), earliestLeftAncestorParent4.getAltNumber()) : -1,
492500
getChildIndex(earliestLeftAncestorParent3),
493501

494502
// info
@@ -507,6 +515,7 @@ public static int getMatchingSymbolOnDiffLine(InputDocument doc,
507515
{
508516
TerminalNode matchingLeftNode = getMatchingLeftSymbol(doc, node);
509517
if (matchingLeftNode != null) {
518+
// System.out.println(node.getPayload()+" matches with "+matchingLeftNode.getSymbol());
510519
int matchingLeftTokenLine = matchingLeftNode.getSymbol().getLine();
511520
return matchingLeftTokenLine != line ? PAIR_ON_DIFF_LINE : PAIR_ON_SAME_LINE;
512521
}
@@ -559,15 +568,6 @@ public static List<Integer> viableLeftTokenTypes(ParserRuleContext node,
559568
return newPairs;
560569
}
561570

562-
public static Token findAlignedToken(List<Token> tokens, Token leftEdgeToken) {
563-
for (Token t : tokens) {
564-
if ( t.getCharPositionInLine() == leftEdgeToken.getCharPositionInLine() ) {
565-
return t;
566-
}
567-
}
568-
return null;
569-
}
570-
571571
/** Search backwards from tokIndex into 'tokens' stream and get all on-channel
572572
* tokens on previous line with respect to token at tokIndex.
573573
* return empty list if none found. First token in returned list is
@@ -761,12 +761,20 @@ public static ParserRuleContext getParent(TerminalNode p) {
761761
return parentClosure((ParserRuleContext)p.getParent());
762762
}
763763

764-
public static int getChildIndex(ParseTree t) {
764+
public static int getChildIndex(ParserRuleContext t) {
765765
if ( t==null ) return -1;
766-
ParseTree parent = t.getParent();
766+
ParserRuleContext parent = t.getParent();
767767
if ( parent==null ) {
768768
return -1;
769769
}
770+
// we know we have a parent now
771+
// check to see if we are 2nd or beyond element in a sibling list
772+
List<ParserRuleContext> siblings = parent.getRuleContexts(t.getClass());
773+
if ( siblings.size()>1 && siblings.indexOf(t)>0 ) {
774+
return CHILD_INDEX_LIST_ELEMENT;
775+
}
776+
// Either first of sibling list or not in a list.
777+
// Figure out which child index t is of parent
770778
for (int i = 0; i<parent.getChildCount(); i++) {
771779
if ( parent.getChild(i)==t ) {
772780
return i;

java/src/org/antlr/codebuff/Formatter.java

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@
1919
import static org.antlr.codebuff.CollectFeatures.CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN;
2020
import static org.antlr.codebuff.CollectFeatures.CAT_INJECT_NL;
2121
import static org.antlr.codebuff.CollectFeatures.CAT_INJECT_WS;
22+
import static org.antlr.codebuff.CollectFeatures.CAT_NO_ALIGNMENT;
2223
import static org.antlr.codebuff.CollectFeatures.FEATURES_ALIGN;
2324
import static org.antlr.codebuff.CollectFeatures.FEATURES_INJECT_WS;
2425
import static org.antlr.codebuff.CollectFeatures.INDEX_FIRST_ON_LINE;
26+
import static org.antlr.codebuff.CollectFeatures.INDEX_MATCHING_TOKEN_DIFF_LINE;
2527
import static org.antlr.codebuff.CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD;
2628
import static org.antlr.codebuff.CollectFeatures.earliestAncestorStartingWithToken;
29+
import static org.antlr.codebuff.CollectFeatures.getMatchingSymbolOnDiffLine;
2730
import static org.antlr.codebuff.CollectFeatures.getNodeFeatures;
2831
import static org.antlr.codebuff.CollectFeatures.getRealTokens;
2932
import static org.antlr.codebuff.CollectFeatures.getTokensOnPreviousLine;
@@ -111,6 +114,7 @@ public String format() {
111114
public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
112115
CommonToken curToken = (CommonToken)tokens.get(tokenIndexInStream);
113116
String tokText = curToken.getText();
117+
TerminalNode node = tokenToNodeMap.get(curToken);
114118

115119
emitCommentsToTheLeft(tokenIndexInStream);
116120

@@ -129,23 +133,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
129133
ws = CollectFeatures.unwscat(injectNL_WS);
130134
}
131135

132-
// getNodeFeatures() also doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
133-
features[INDEX_FIRST_ON_LINE] = newlines>0 ? 1 : 0; // use \n prediction to match exemplars for alignment
134-
135-
int align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);
136-
137-
TokenPositionAnalysis tokenPositionAnalysis =
138-
getTokenAnalysis(features, indexIntoRealTokens, tokenIndexInStream, newlines, align, ws);
139-
analysis.setSize(tokenIndexInStream+1);
140-
analysis.set(tokenIndexInStream, tokenPositionAnalysis);
141-
142136
if ( ws==0 && cannotJoin(realTokens.get(indexIntoRealTokens-1), curToken) ) { // failsafe!
143137
ws = 1;
144138
}
145139

140+
int align = CAT_NO_ALIGNMENT;
141+
146142
if ( newlines>0 ) {
147143
output.append(Tool.newlines(newlines));
148-
line++;
144+
line+=newlines;
149145
charPosInLine = 0;
150146

151147
List<Token> tokensOnPreviousLine = getTokensOnPreviousLine(tokens, tokenIndexInStream, line);
@@ -154,9 +150,15 @@ else if ( (injectNL_WS&0xFF)==CAT_INJECT_WS ) {
154150
firstTokenOnPrevLine = tokensOnPreviousLine.get(0);
155151
}
156152

157-
TerminalNode node = tokenToNodeMap.get(curToken);
158153
ParserRuleContext parent = (ParserRuleContext)node.getParent();
159154

155+
// getNodeFeatures() doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
156+
features[INDEX_FIRST_ON_LINE] = newlines>0 ? 1 : 0; // use \n prediction to match exemplars for alignment
157+
// if we decide to inject a newline, we better recompute this value before classifying alignment
158+
features[INDEX_MATCHING_TOKEN_DIFF_LINE] = getMatchingSymbolOnDiffLine(doc, node, line);
159+
160+
align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);
161+
160162
if ( align==CAT_INDENT ) {
161163
if ( firstTokenOnPrevLine!=null ) { // if not on first line, we cannot indent
162164
int indentedCol = firstTokenOnPrevLine.getCharPositionInLine()+INDENT_LEVEL;
@@ -204,6 +206,11 @@ else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
204206
charPosInLine += ws;
205207
}
206208

209+
TokenPositionAnalysis tokenPositionAnalysis =
210+
getTokenAnalysis(features, indexIntoRealTokens, tokenIndexInStream, newlines, align, ws);
211+
analysis.setSize(tokenIndexInStream+1);
212+
analysis.set(tokenIndexInStream, tokenPositionAnalysis);
213+
207214
// update Token object with position information now that we are about
208215
// to emit it.
209216
curToken.setLine(line);

0 commit comments

Comments
 (0)