Skip to content

Commit e1a5793

Browse files
committed
Merge pull request #18 from antlr/make-align-like-indent
rm start of list feature, find alignment with rule delta, child index…
2 parents e7b7dd2 + e50d721 commit e1a5793

11 files changed

Lines changed: 210 additions & 346 deletions

java/codebuff.ipr

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,24 @@
3737
<component name="EntryPointsManager">
3838
<entry_points version="2.0" />
3939
</component>
40+
<component name="InspectionProjectProfileManager">
41+
<profile version="1.0">
42+
<option name="myName" value="Project Default" />
43+
<option name="myLocal" value="true" />
44+
<inspection_tool class="FieldCanBeLocal" enabled="false" level="WARNING" enabled_by_default="false" />
45+
<inspection_tool class="FinalPrivateMethod" enabled="false" level="WARNING" enabled_by_default="false" />
46+
<inspection_tool class="FinalStaticMethod" enabled="false" level="WARNING" enabled_by_default="false" />
47+
<inspection_tool class="LoggerInitializedWithForeignClass" enabled="false" level="WARNING" enabled_by_default="false">
48+
<option name="loggerClassName" value="org.apache.log4j.Logger,org.slf4j.LoggerFactory,org.apache.commons.logging.LogFactory,java.util.logging.Logger" />
49+
<option name="loggerFactoryMethodName" value="getLogger,getLogger,getLog,getLogger" />
50+
</inspection_tool>
51+
<inspection_tool class="ParameterCanBeLocal" enabled="false" level="WARNING" enabled_by_default="false" />
52+
<inspection_tool class="TypeParameterHidesVisibleType" enabled="false" level="WARNING" enabled_by_default="false" />
53+
</profile>
54+
<option name="PROJECT_PROFILE" value="Project Default" />
55+
<option name="USE_PROJECT_PROFILE" value="true" />
56+
<version value="1.0" />
57+
</component>
4058
<component name="MavenProjectsManager">
4159
<option name="originalFiles">
4260
<list>

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 126 additions & 201 deletions
Large diffs are not rendered by default.

java/src/org/antlr/codebuff/Corpus.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ public void buildTokenContextIndex() {
8686
for (int i=0; i<X.size(); i++) {
8787
int curTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_RULE];
8888
int prevTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_PREV_RULE];
89-
Pair<Integer, Integer> key = new Pair<>(prevTokenRuleIndex, curTokenRuleIndex);
89+
int pr = CollectFeatures.unrulealt(prevTokenRuleIndex)[0];
90+
int cr = CollectFeatures.unrulealt(curTokenRuleIndex)[0];
91+
Pair<Integer, Integer> key = new Pair<>(pr, cr);
9092
List<Integer> vectorIndexes = curAndPrevTokenRuleIndexToVectorsMap.get(key);
9193
if ( vectorIndexes==null ) {
9294
vectorIndexes = new ArrayList<>();

java/src/org/antlr/codebuff/Formatter.java

Lines changed: 36 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@
66
import org.antlr.v4.runtime.Token;
77
import org.antlr.v4.runtime.WritableToken;
88
import org.antlr.v4.runtime.misc.Interval;
9+
import org.antlr.v4.runtime.tree.ParseTree;
910
import org.antlr.v4.runtime.tree.TerminalNode;
1011

1112
import java.util.List;
1213
import java.util.Map;
1314
import java.util.Vector;
1415

15-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN;
16-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_ANCESTOR_FIRST_TOKEN;
17-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_LIST_FIRST_ELEMENT;
18-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_PAIR;
16+
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_ANCESTOR_CHILD;
1917
import static org.antlr.codebuff.CollectFeatures.CAT_INDENT;
2018
import static org.antlr.codebuff.CollectFeatures.CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN;
2119
import static org.antlr.codebuff.CollectFeatures.CAT_NO_ALIGNMENT;
@@ -25,15 +23,11 @@
2523
import static org.antlr.codebuff.CollectFeatures.INDEX_FIRST_ON_LINE;
2624
import static org.antlr.codebuff.CollectFeatures.INDEX_PREV_END_COLUMN;
2725
import static org.antlr.codebuff.CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD;
28-
import static org.antlr.codebuff.CollectFeatures.earliestAncestorEndingWithToken;
2926
import static org.antlr.codebuff.CollectFeatures.earliestAncestorStartingWithToken;
30-
import static org.antlr.codebuff.CollectFeatures.getListSiblings;
31-
import static org.antlr.codebuff.CollectFeatures.getMatchingLeftSymbol;
3227
import static org.antlr.codebuff.CollectFeatures.getNodeFeatures;
3328
import static org.antlr.codebuff.CollectFeatures.getRealTokens;
3429
import static org.antlr.codebuff.CollectFeatures.getTokensOnPreviousLine;
3530
import static org.antlr.codebuff.CollectFeatures.indexTree;
36-
import static org.antlr.codebuff.CollectFeatures.isAlignedWithFirstSiblingOfList;
3731

3832
public class Formatter {
3933
public static final int INDENT_LEVEL = 4;
@@ -75,6 +69,7 @@ public Formatter(Corpus corpus, InputDocument doc, int tabSize) {
7569
wsClassifier = new CodekNNClassifier(corpus, FEATURES_INJECT_WS);
7670
alignClassifier = new CodekNNClassifier(corpus, FEATURES_ALIGN);
7771
// k = (int)Math.sqrt(corpus.X.size());
72+
// k = 7;
7873
k = 11;
7974
this.tabSize = tabSize;
8075
}
@@ -154,7 +149,6 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
154149

155150
TerminalNode node = tokenToNodeMap.get(curToken);
156151
ParserRuleContext parent = (ParserRuleContext)node.getParent();
157-
ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken(parent, curToken);
158152

159153
switch ( align ) {
160154
case CAT_INDENT :
@@ -164,47 +158,43 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
164158
output.append(Tool.spaces(indentedCol));
165159
}
166160
break;
167-
case CAT_ALIGN_WITH_ANCESTOR_FIRST_TOKEN :
168-
if ( earliestRightAncestor!=null ) {
169-
Token earliestRightAncestorStart = earliestRightAncestor.getStart();
170-
int linedUpCol = earliestRightAncestorStart.getCharPositionInLine();
171-
charPosInLine = linedUpCol;
172-
output.append(Tool.spaces(linedUpCol));
173-
}
174-
break;
175-
case CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN :
176-
if ( earliestRightAncestor!=null ) {
177-
ParserRuleContext earliestAncestorParent = earliestRightAncestor.getParent();
178-
if ( earliestAncestorParent!=null ) {
179-
Token earliestAncestorParentStart = earliestAncestorParent.getStart();
180-
int linedUpCol = earliestAncestorParentStart.getCharPositionInLine();
181-
charPosInLine = linedUpCol;
182-
output.append(Tool.spaces(linedUpCol));
183-
}
184-
}
185-
break;
186-
case CAT_ALIGN_WITH_LIST_FIRST_ELEMENT :
187-
List<ParserRuleContext> listSiblings = getListSiblings(tokenToNodeMap, curToken);
188-
if ( listSiblings!=null ) {
189-
ParserRuleContext firstSibling = listSiblings.get(0);
190-
int linedUpCol = firstSibling.getStart().getCharPositionInLine();
191-
charPosInLine = linedUpCol;
192-
output.append(Tool.spaces(linedUpCol));
193-
}
194-
break;
195-
case CAT_ALIGN_WITH_PAIR :
196-
TerminalNode matchingLeftSymbol = getMatchingLeftSymbol(doc, node);
197-
int linedUpCol = matchingLeftSymbol.getSymbol().getCharPositionInLine();
198-
charPosInLine = linedUpCol;
199-
output.append(Tool.spaces(linedUpCol));
200-
break;
201161
case CAT_NO_ALIGNMENT :
202162
break;
203163

204164
default :
205-
if ( align>=CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
206-
int deltaFromAncestor = align - CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN;
165+
if ( (align&0xFF)==CAT_ALIGN_WITH_ANCESTOR_CHILD ) {
166+
int[] deltaChild = CollectFeatures.unaligncat(align);
167+
int deltaFromAncestor = deltaChild[0];
168+
int childIndex = deltaChild[1];
207169
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
170+
if ( earliestLeftAncestor==null ) {
171+
earliestLeftAncestor = parent;
172+
}
173+
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
174+
ParseTree child = ancestor.getChild(childIndex);
175+
Token start = null;
176+
if ( child instanceof ParserRuleContext ) {
177+
start = ((ParserRuleContext) child).getStart();
178+
}
179+
else if ( child instanceof TerminalNode ){
180+
start = ((TerminalNode)child).getSymbol();
181+
}
182+
else {
183+
// uh oh.
184+
System.err.println("Whoops. Tried access invalid child");
185+
}
186+
if ( start!=null ) {
187+
int indentCol = start.getCharPositionInLine();
188+
charPosInLine = indentCol;
189+
output.append(Tool.spaces(indentCol));
190+
}
191+
}
192+
else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
193+
int deltaFromAncestor = CollectFeatures.unindentcat(align);
194+
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
195+
if ( earliestLeftAncestor==null ) {
196+
earliestLeftAncestor = parent;
197+
}
208198
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
209199
Token start = ancestor.getStart();
210200
int indentCol = start.getCharPositionInLine() + INDENT_LEVEL;
@@ -251,13 +241,12 @@ public TokenPositionAnalysis getTokenAnalysis(int[] features, int indexIntoRealT
251241
boolean prevIsWS = prevToken.getType()==JavaLexer.WS;
252242
int actualNL = Tool.count(prevToken.getText(), '\n');
253243
int actualWS = Tool.count(prevToken.getText(), ' ');
254-
boolean actualAlign = isAlignedWithFirstSiblingOfList(tokenToNodeMap, tokens, curToken);
255244
String newlinePredictionString = String.format("### line %d: predicted %d \\n actual %s",
256245
originalCurToken.getLine(), injectNewline, prevIsWS ? actualNL : "none");
257246
String alignPredictionString = String.format("### line %d: predicted %s actual %s",
258247
originalCurToken.getLine(),
259248
alignWithPrevious==1?"align":"unaligned",
260-
actualAlign?"align":"unaligned");
249+
"?");
261250
String wsPredictionString = String.format("### line %d: predicted %d ' ' actual %s",
262251
originalCurToken.getLine(), ws, prevIsWS ? actualWS : "none");
263252
if ( failsafeTriggered ) {

java/src/org/antlr/codebuff/InputDocument.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,10 @@ public String getLine(int line) {
4141
if ( lines==null ) {
4242
lines = Arrays.asList(content.split("\n"));
4343
}
44-
return lines.get(line-1);
44+
if ( line>0 ) {
45+
return lines.get(line-1);
46+
}
47+
return null;
4548
}
4649

4750
public double getIncorrectWSRate() {

java/src/org/antlr/codebuff/Neighbor.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@ public String toString(FeatureMetaData[] FEATURES, List<Integer> Y) {
2121
String lineText = doc.getLine(line);
2222
int col = X[CollectFeatures.INDEX_INFO_CHARPOS];
2323
// insert a dot right before char position
24-
lineText = lineText.substring(0,col) + '\u00B7' + lineText.substring(col,lineText.length());
25-
return String.format("%s (cat=%d,d=%1.3f): %s", features, Y.get(corpusVectorIndex), distance, lineText);
24+
if ( lineText!=null ) {
25+
lineText = lineText.substring(0, col)+'\u00B7'+lineText.substring(col, lineText.length());
26+
}
27+
int cat = Y.get(corpusVectorIndex);
28+
int[] elements = CollectFeatures.unaligncat(cat);
29+
String display = String.format("%d|%d|%d", cat&0xFF, elements[0], elements[1]);
30+
return String.format("%s (cat=%s,d=%1.3f): %s", features, display, distance, lineText);
2631
}
2732
}

java/src/org/antlr/codebuff/TokenContext.java

Lines changed: 0 additions & 34 deletions
This file was deleted.

java/src/org/antlr/codebuff/TokenPositionAnalysis.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ public class TokenPositionAnalysis {
77
public String ws = "n/a";
88
public String align = "n/a";
99

10-
public TokenPositionAnalysis() {
11-
}
12-
1310
public TokenPositionAnalysis(String newline, String align, String ws) {
1411
this.align = align;
1512
this.newline = newline;

java/src/org/antlr/codebuff/Tool.java

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,7 @@
1414
import org.antlr.v4.runtime.misc.Pair;
1515
import org.antlr.v4.runtime.tree.ParseTreeWalker;
1616

17-
import java.io.BufferedWriter;
1817
import java.io.File;
19-
import java.io.FileWriter;
20-
import java.io.IOException;
2118
import java.lang.reflect.Constructor;
2219
import java.lang.reflect.Method;
2320
import java.nio.file.FileSystems;
@@ -119,21 +116,6 @@ public static Corpus train(String rootDir,
119116
return corpus;
120117
}
121118

122-
public void saveCSV(List<InputDocument> documents, String dir) throws IOException {
123-
FileWriter fw = new FileWriter(dir+"/style.csv");
124-
BufferedWriter bw = new BufferedWriter(fw);
125-
// bw.write(Utils.join(CollectFeatures.FEATURE_NAMES, ", "));
126-
bw.write("\n");
127-
for (InputDocument doc : documents) {
128-
for (int[] record : doc.featureVectors) {
129-
String r = join(record, ", ");
130-
bw.write(r);
131-
bw.write('\n');
132-
}
133-
}
134-
bw.close();
135-
}
136-
137119
public static Corpus processSampleDocs(List<InputDocument> docs,
138120
Class<? extends Lexer> lexerClass,
139121
Class<? extends Parser> parserClass,
@@ -164,9 +146,7 @@ public static Corpus processSampleDocs(List<InputDocument> docs,
164146
}
165147

166148
/** Parse document, save feature vectors to the doc but return it also */
167-
public static void process(InputDocument doc, int tabSize, Map<String, List<Pair<Integer, Integer>>> ruleToPairsBag)
168-
throws Exception
169-
{
149+
public static void process(InputDocument doc, int tabSize, Map<String, List<Pair<Integer, Integer>>> ruleToPairsBag) {
170150
CollectFeatures collector = new CollectFeatures(doc, tabSize, ruleToPairsBag);
171151
collector.computeFeatureVectors();
172152

@@ -234,7 +214,7 @@ public static List<InputDocument> load(List<String> fileNames,
234214
int tabSize)
235215
throws Exception
236216
{
237-
List<InputDocument> input = new ArrayList<InputDocument>(fileNames.size());
217+
List<InputDocument> input = new ArrayList<>(fileNames.size());
238218
int i = 0;
239219
for (String f : fileNames) {
240220
InputDocument doc = load(f, lexerClass, tabSize);
@@ -281,17 +261,17 @@ public static InputDocument load(String fileName,
281261
}
282262

283263
public static List<String> getFilenames(File f, String inputFilePattern) throws Exception {
284-
List<String> files = new ArrayList<String>();
264+
List<String> files = new ArrayList<>();
285265
getFilenames_(f, inputFilePattern, files);
286266
return files;
287267
}
288268

289-
public static void getFilenames_(File f, String inputFilePattern, List<String> files) throws Exception {
269+
public static void getFilenames_(File f, String inputFilePattern, List<String> files) {
290270
// If this is a directory, walk each file/dir in that directory
291271
if (f.isDirectory()) {
292272
String flist[] = f.list();
293-
for (int i=0; i < flist.length; i++) {
294-
getFilenames_(new File(f, flist[i]), inputFilePattern, files);
273+
for (String aFlist : flist) {
274+
getFilenames_(new File(f, aFlist), inputFilePattern, files);
295275
}
296276
}
297277

@@ -338,7 +318,6 @@ public static List<CommonToken> copy(CommonTokenStream tokens) {
338318
List<CommonToken> copy = new ArrayList<>();
339319
tokens.fill();
340320
for (Token t : tokens.getTokens()) {
341-
CommonToken ct = (CommonToken)t;
342321
copy.add(new CommonToken(t));
343322
}
344323
return copy;
@@ -364,6 +343,7 @@ public static double weightedL0_Distance(FeatureMetaData[] featureTypes, int[] A
364343
for (int i=0; i<A.length; i++) {
365344
if ( featureTypes[i].type==FeatureType.TOKEN ||
366345
featureTypes[i].type==FeatureType.RULE ||
346+
featureTypes[i].type==FeatureType.INT ||
367347
featureTypes[i].type==FeatureType.BOOL
368348
)
369349
{

java/src/org/antlr/codebuff/gui/GUIController.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import java.awt.*;
1717
import java.util.List;
1818

19-
import static javax.swing.JFrame.EXIT_ON_CLOSE;
2019
import static javax.swing.text.DefaultHighlighter.DefaultHighlightPainter;
2120

2221
public class GUIController {
@@ -71,7 +70,7 @@ public void show() throws Exception {
7170
scope.injectWSConsole.setFont(docFont);
7271

7372
JFrame frame = new JFrame("CodeBuff Scope");
74-
frame.setDefaultCloseOperation(EXIT_ON_CLOSE);
73+
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
7574
frame.getContentPane().add(scope.$$$getRootComponent$$$(), BorderLayout.CENTER);
7675

7776
scope.getFormattedTextPane().addCaretListener(new HighlightTokenListener());

0 commit comments

Comments
 (0)