Skip to content

Commit 19221dd

Browse files
committed
add failsafe on alignment classification so that if we go no votes at all, we try again with wider range of acceptable mismatches in context.
1 parent e1a5793 commit 19221dd

3 files changed

Lines changed: 79 additions & 88 deletions

File tree

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
public class CollectFeatures {
2727
public static final double MAX_CONTEXT_DIFF_THRESHOLD = 0.20;
28+
public static final double MAX_CONTEXT_DIFF_THRESHOLD2 = 0.50;
2829

2930
// Feature values for pair on diff lines feature
3031
public static final int NOT_PAIR = -1;
@@ -316,32 +317,6 @@ public static int getPrecedingNL(CommonTokenStream tokens, int i) {
316317
return precedingNL;
317318
}
318319

319-
public static boolean isAlignedWithFirstSiblingOfList(Map<Token, TerminalNode> tokenToNodeMap,
320-
CommonTokenStream tokens,
321-
Token curToken)
322-
{
323-
TerminalNode node = tokenToNodeMap.get(curToken);
324-
ParserRuleContext parent = (ParserRuleContext)node.getParent();
325-
ParserRuleContext earliestAncestor = earliestAncestorStartingWithToken(parent, curToken);
326-
boolean aligned = false;
327-
328-
// at a newline, are we aligned with a prior sibling (in a list)?
329-
int precedingNL = getPrecedingNL(tokens, curToken.getTokenIndex());
330-
if ( precedingNL>0 && earliestAncestor!=null ) {
331-
ParserRuleContext commonAncestor = earliestAncestor.getParent();
332-
List<ParserRuleContext> siblings = commonAncestor.getRuleContexts(earliestAncestor.getClass());
333-
if ( siblings.size()>1 ) {
334-
ParserRuleContext firstSibling = siblings.get(0);
335-
Token firstSiblingStartToken = firstSibling.getStart();
336-
if ( firstSiblingStartToken!=curToken && // can't align with yourself
337-
firstSiblingStartToken.getCharPositionInLine()==curToken.getCharPositionInLine() ) {
338-
aligned = true;
339-
}
340-
}
341-
}
342-
return aligned;
343-
}
344-
345320
/** Walk upwards from node while p.start == token; return null if there is
346321
* no ancestor starting at token.
347322
*/

java/src/org/antlr/codebuff/Formatter.java

Lines changed: 55 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package org.antlr.codebuff;
22

3+
import org.antlr.codebuff.misc.HashBag;
34
import org.antlr.v4.runtime.CommonToken;
45
import org.antlr.v4.runtime.CommonTokenStream;
56
import org.antlr.v4.runtime.ParserRuleContext;
@@ -23,6 +24,7 @@
2324
import static org.antlr.codebuff.CollectFeatures.INDEX_FIRST_ON_LINE;
2425
import static org.antlr.codebuff.CollectFeatures.INDEX_PREV_END_COLUMN;
2526
import static org.antlr.codebuff.CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD;
27+
import static org.antlr.codebuff.CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD2;
2628
import static org.antlr.codebuff.CollectFeatures.earliestAncestorStartingWithToken;
2729
import static org.antlr.codebuff.CollectFeatures.getNodeFeatures;
2830
import static org.antlr.codebuff.CollectFeatures.getRealTokens;
@@ -124,7 +126,14 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
124126
features[INDEX_FIRST_ON_LINE] = injectNewline; // use \n prediction to match exemplars for alignment
125127

126128
int align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);
127-
//indentClassifier.classify(k, features, corpus.indent, CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD);
129+
if ( align==CAT_NO_ALIGNMENT ) {
130+
HashBag<Integer> votes = alignClassifier.votes(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD);
131+
if ( votes.size()==0 ) {
132+
// try with less strict match threshold to get some indication of alignment
133+
align = alignClassifier.classify(k, features, corpus.align, MAX_CONTEXT_DIFF_THRESHOLD2);
134+
}
135+
}
136+
128137
int ws = wsClassifier.classify(k, features, corpus.injectWS, MAX_CONTEXT_DIFF_THRESHOLD);
129138

130139
TokenPositionAnalysis tokenPositionAnalysis =
@@ -150,57 +159,51 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
150159
TerminalNode node = tokenToNodeMap.get(curToken);
151160
ParserRuleContext parent = (ParserRuleContext)node.getParent();
152161

153-
switch ( align ) {
154-
case CAT_INDENT :
155-
if ( firstTokenOnPrevLine!=null ) { // if not on first line, we can indent indent
156-
int indentedCol = firstTokenOnPrevLine.getCharPositionInLine() + INDENT_LEVEL;
157-
charPosInLine = indentedCol;
158-
output.append(Tool.spaces(indentedCol));
159-
}
160-
break;
161-
case CAT_NO_ALIGNMENT :
162-
break;
163-
164-
default :
165-
if ( (align&0xFF)==CAT_ALIGN_WITH_ANCESTOR_CHILD ) {
166-
int[] deltaChild = CollectFeatures.unaligncat(align);
167-
int deltaFromAncestor = deltaChild[0];
168-
int childIndex = deltaChild[1];
169-
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
170-
if ( earliestLeftAncestor==null ) {
171-
earliestLeftAncestor = parent;
172-
}
173-
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
174-
ParseTree child = ancestor.getChild(childIndex);
175-
Token start = null;
176-
if ( child instanceof ParserRuleContext ) {
177-
start = ((ParserRuleContext) child).getStart();
178-
}
179-
else if ( child instanceof TerminalNode ){
180-
start = ((TerminalNode)child).getSymbol();
181-
}
182-
else {
183-
// uh oh.
184-
System.err.println("Whoops. Tried access invalid child");
185-
}
186-
if ( start!=null ) {
187-
int indentCol = start.getCharPositionInLine();
188-
charPosInLine = indentCol;
189-
output.append(Tool.spaces(indentCol));
190-
}
191-
}
192-
else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
193-
int deltaFromAncestor = CollectFeatures.unindentcat(align);
194-
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
195-
if ( earliestLeftAncestor==null ) {
196-
earliestLeftAncestor = parent;
197-
}
198-
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
199-
Token start = ancestor.getStart();
200-
int indentCol = start.getCharPositionInLine() + INDENT_LEVEL;
201-
charPosInLine = indentCol;
202-
output.append(Tool.spaces(indentCol));
203-
}
162+
if ( align==CAT_INDENT ) {
163+
if ( firstTokenOnPrevLine!=null ) { // if not on first line, we can indent indent
164+
int indentedCol = firstTokenOnPrevLine.getCharPositionInLine()+INDENT_LEVEL;
165+
charPosInLine = indentedCol;
166+
output.append(Tool.spaces(indentedCol));
167+
}
168+
}
169+
else if ( (align&0xFF)==CAT_ALIGN_WITH_ANCESTOR_CHILD ) {
170+
int[] deltaChild = CollectFeatures.unaligncat(align);
171+
int deltaFromAncestor = deltaChild[0];
172+
int childIndex = deltaChild[1];
173+
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
174+
if ( earliestLeftAncestor==null ) {
175+
earliestLeftAncestor = parent;
176+
}
177+
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
178+
ParseTree child = ancestor.getChild(childIndex);
179+
Token start = null;
180+
if ( child instanceof ParserRuleContext ) {
181+
start = ((ParserRuleContext) child).getStart();
182+
}
183+
else if ( child instanceof TerminalNode ){
184+
start = ((TerminalNode)child).getSymbol();
185+
}
186+
else {
187+
// uh oh.
188+
System.err.println("Whoops. Tried access invalid child");
189+
}
190+
if ( start!=null ) {
191+
int indentCol = start.getCharPositionInLine();
192+
charPosInLine = indentCol;
193+
output.append(Tool.spaces(indentCol));
194+
}
195+
}
196+
else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
197+
int deltaFromAncestor = CollectFeatures.unindentcat(align);
198+
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
199+
if ( earliestLeftAncestor==null ) {
200+
earliestLeftAncestor = parent;
201+
}
202+
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
203+
Token start = ancestor.getStart();
204+
int indentCol = start.getCharPositionInLine() + INDENT_LEVEL;
205+
charPosInLine = indentCol;
206+
output.append(Tool.spaces(indentCol));
204207
}
205208
}
206209
else {

java/src/org/antlr/codebuff/Tool.java

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,19 +34,32 @@ public static void main(String[] args)
3434
throws Exception
3535
{
3636
if ( args.length<2 ) {
37-
System.err.println("ExtractFeatures root-dir-of-samples test-file");
37+
System.err.println("ExtractFeatures [-java|-antlr] root-dir-of-samples test-file");
3838
}
3939
int tabSize = 4; // TODO: MAKE AN ARGUMENT
40-
String corpusDir = args[0];
41-
String testFilename = args[1];
42-
Corpus corpus = train(corpusDir, JavaLexer.class, JavaParser.class, tabSize);
43-
InputDocument testDoc = load(testFilename, JavaLexer.class, tabSize);
44-
Pair<String,List<TokenPositionAnalysis>> results = format(corpus, testDoc, tabSize);
45-
String output = results.a;
46-
List<TokenPositionAnalysis> analysisPerToken = results.b;
40+
String language = args[0];
41+
String corpusDir = args[1];
42+
String testFilename = args[2];
43+
String output;
44+
if ( language.equals("-java") ) {
45+
Corpus corpus = train(corpusDir, JavaLexer.class, JavaParser.class, tabSize);
46+
InputDocument testDoc = load(testFilename, JavaLexer.class, tabSize);
47+
Pair<String,List<TokenPositionAnalysis>> results = format(corpus, testDoc, tabSize);
48+
output = results.a;
49+
List<TokenPositionAnalysis> analysisPerToken = results.b;
50+
GUIController controller = new GUIController(analysisPerToken, testDoc, output, JavaLexer.class);
51+
controller.show();
52+
}
53+
else {
54+
Corpus corpus = train(corpusDir, ANTLRv4Lexer.class, ANTLRv4Parser.class, tabSize);
55+
InputDocument testDoc = load(testFilename, ANTLRv4Lexer.class, tabSize);
56+
Pair<String,List<TokenPositionAnalysis>> results = format(corpus, testDoc, tabSize);
57+
output = results.a;
58+
List<TokenPositionAnalysis> analysisPerToken = results.b;
59+
GUIController controller = new GUIController(analysisPerToken, testDoc, output, JavaLexer.class);
60+
controller.show();
61+
}
4762
System.out.println(output);
48-
GUIController controller = new GUIController(analysisPerToken, testDoc, output, JavaLexer.class);
49-
controller.show();
5063
}
5164

5265
/** Given a corpus, format the document by tokenizing and using the

0 commit comments

Comments
 (0)