Skip to content

Commit e50d721

Browse files
committed
rm start of list feature, find alignment with rule delta, child index rather than finding "is element of list" etc... simplified code. The corpus cache should only use ruleIndex not ruleIndex:altNum as it is too restrictive.
1 parent e7b7dd2 commit e50d721

11 files changed

Lines changed: 210 additions & 346 deletions

java/codebuff.ipr

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,24 @@
3737
<component name="EntryPointsManager">
3838
<entry_points version="2.0" />
3939
</component>
40+
<component name="InspectionProjectProfileManager">
41+
<profile version="1.0">
42+
<option name="myName" value="Project Default" />
43+
<option name="myLocal" value="true" />
44+
<inspection_tool class="FieldCanBeLocal" enabled="false" level="WARNING" enabled_by_default="false" />
45+
<inspection_tool class="FinalPrivateMethod" enabled="false" level="WARNING" enabled_by_default="false" />
46+
<inspection_tool class="FinalStaticMethod" enabled="false" level="WARNING" enabled_by_default="false" />
47+
<inspection_tool class="LoggerInitializedWithForeignClass" enabled="false" level="WARNING" enabled_by_default="false">
48+
<option name="loggerClassName" value="org.apache.log4j.Logger,org.slf4j.LoggerFactory,org.apache.commons.logging.LogFactory,java.util.logging.Logger" />
49+
<option name="loggerFactoryMethodName" value="getLogger,getLogger,getLog,getLogger" />
50+
</inspection_tool>
51+
<inspection_tool class="ParameterCanBeLocal" enabled="false" level="WARNING" enabled_by_default="false" />
52+
<inspection_tool class="TypeParameterHidesVisibleType" enabled="false" level="WARNING" enabled_by_default="false" />
53+
</profile>
54+
<option name="PROJECT_PROFILE" value="Project Default" />
55+
<option name="USE_PROJECT_PROFILE" value="true" />
56+
<version value="1.0" />
57+
</component>
4058
<component name="MavenProjectsManager">
4159
<option name="originalFiles">
4260
<list>

java/src/org/antlr/codebuff/CollectFeatures.java

Lines changed: 126 additions & 201 deletions
Large diffs are not rendered by default.

java/src/org/antlr/codebuff/Corpus.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ public void buildTokenContextIndex() {
8686
for (int i=0; i<X.size(); i++) {
8787
int curTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_RULE];
8888
int prevTokenRuleIndex = X.get(i)[CollectFeatures.INDEX_PREV_RULE];
89-
Pair<Integer, Integer> key = new Pair<>(prevTokenRuleIndex, curTokenRuleIndex);
89+
int pr = CollectFeatures.unrulealt(prevTokenRuleIndex)[0];
90+
int cr = CollectFeatures.unrulealt(curTokenRuleIndex)[0];
91+
Pair<Integer, Integer> key = new Pair<>(pr, cr);
9092
List<Integer> vectorIndexes = curAndPrevTokenRuleIndexToVectorsMap.get(key);
9193
if ( vectorIndexes==null ) {
9294
vectorIndexes = new ArrayList<>();

java/src/org/antlr/codebuff/Formatter.java

Lines changed: 36 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,14 @@
66
import org.antlr.v4.runtime.Token;
77
import org.antlr.v4.runtime.WritableToken;
88
import org.antlr.v4.runtime.misc.Interval;
9+
import org.antlr.v4.runtime.tree.ParseTree;
910
import org.antlr.v4.runtime.tree.TerminalNode;
1011

1112
import java.util.List;
1213
import java.util.Map;
1314
import java.util.Vector;
1415

15-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN;
16-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_ANCESTOR_FIRST_TOKEN;
17-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_LIST_FIRST_ELEMENT;
18-
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_PAIR;
16+
import static org.antlr.codebuff.CollectFeatures.CAT_ALIGN_WITH_ANCESTOR_CHILD;
1917
import static org.antlr.codebuff.CollectFeatures.CAT_INDENT;
2018
import static org.antlr.codebuff.CollectFeatures.CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN;
2119
import static org.antlr.codebuff.CollectFeatures.CAT_NO_ALIGNMENT;
@@ -25,15 +23,11 @@
2523
import static org.antlr.codebuff.CollectFeatures.INDEX_FIRST_ON_LINE;
2624
import static org.antlr.codebuff.CollectFeatures.INDEX_PREV_END_COLUMN;
2725
import static org.antlr.codebuff.CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD;
28-
import static org.antlr.codebuff.CollectFeatures.earliestAncestorEndingWithToken;
2926
import static org.antlr.codebuff.CollectFeatures.earliestAncestorStartingWithToken;
30-
import static org.antlr.codebuff.CollectFeatures.getListSiblings;
31-
import static org.antlr.codebuff.CollectFeatures.getMatchingLeftSymbol;
3227
import static org.antlr.codebuff.CollectFeatures.getNodeFeatures;
3328
import static org.antlr.codebuff.CollectFeatures.getRealTokens;
3429
import static org.antlr.codebuff.CollectFeatures.getTokensOnPreviousLine;
3530
import static org.antlr.codebuff.CollectFeatures.indexTree;
36-
import static org.antlr.codebuff.CollectFeatures.isAlignedWithFirstSiblingOfList;
3731

3832
public class Formatter {
3933
public static final int INDENT_LEVEL = 4;
@@ -75,6 +69,7 @@ public Formatter(Corpus corpus, InputDocument doc, int tabSize) {
7569
wsClassifier = new CodekNNClassifier(corpus, FEATURES_INJECT_WS);
7670
alignClassifier = new CodekNNClassifier(corpus, FEATURES_ALIGN);
7771
// k = (int)Math.sqrt(corpus.X.size());
72+
// k = 7;
7873
k = 11;
7974
this.tabSize = tabSize;
8075
}
@@ -154,7 +149,6 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
154149

155150
TerminalNode node = tokenToNodeMap.get(curToken);
156151
ParserRuleContext parent = (ParserRuleContext)node.getParent();
157-
ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken(parent, curToken);
158152

159153
switch ( align ) {
160154
case CAT_INDENT :
@@ -164,47 +158,43 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
164158
output.append(Tool.spaces(indentedCol));
165159
}
166160
break;
167-
case CAT_ALIGN_WITH_ANCESTOR_FIRST_TOKEN :
168-
if ( earliestRightAncestor!=null ) {
169-
Token earliestRightAncestorStart = earliestRightAncestor.getStart();
170-
int linedUpCol = earliestRightAncestorStart.getCharPositionInLine();
171-
charPosInLine = linedUpCol;
172-
output.append(Tool.spaces(linedUpCol));
173-
}
174-
break;
175-
case CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN :
176-
if ( earliestRightAncestor!=null ) {
177-
ParserRuleContext earliestAncestorParent = earliestRightAncestor.getParent();
178-
if ( earliestAncestorParent!=null ) {
179-
Token earliestAncestorParentStart = earliestAncestorParent.getStart();
180-
int linedUpCol = earliestAncestorParentStart.getCharPositionInLine();
181-
charPosInLine = linedUpCol;
182-
output.append(Tool.spaces(linedUpCol));
183-
}
184-
}
185-
break;
186-
case CAT_ALIGN_WITH_LIST_FIRST_ELEMENT :
187-
List<ParserRuleContext> listSiblings = getListSiblings(tokenToNodeMap, curToken);
188-
if ( listSiblings!=null ) {
189-
ParserRuleContext firstSibling = listSiblings.get(0);
190-
int linedUpCol = firstSibling.getStart().getCharPositionInLine();
191-
charPosInLine = linedUpCol;
192-
output.append(Tool.spaces(linedUpCol));
193-
}
194-
break;
195-
case CAT_ALIGN_WITH_PAIR :
196-
TerminalNode matchingLeftSymbol = getMatchingLeftSymbol(doc, node);
197-
int linedUpCol = matchingLeftSymbol.getSymbol().getCharPositionInLine();
198-
charPosInLine = linedUpCol;
199-
output.append(Tool.spaces(linedUpCol));
200-
break;
201161
case CAT_NO_ALIGNMENT :
202162
break;
203163

204164
default :
205-
if ( align>=CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
206-
int deltaFromAncestor = align - CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN;
165+
if ( (align&0xFF)==CAT_ALIGN_WITH_ANCESTOR_CHILD ) {
166+
int[] deltaChild = CollectFeatures.unaligncat(align);
167+
int deltaFromAncestor = deltaChild[0];
168+
int childIndex = deltaChild[1];
207169
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
170+
if ( earliestLeftAncestor==null ) {
171+
earliestLeftAncestor = parent;
172+
}
173+
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
174+
ParseTree child = ancestor.getChild(childIndex);
175+
Token start = null;
176+
if ( child instanceof ParserRuleContext ) {
177+
start = ((ParserRuleContext) child).getStart();
178+
}
179+
else if ( child instanceof TerminalNode ){
180+
start = ((TerminalNode)child).getSymbol();
181+
}
182+
else {
183+
// uh oh.
184+
System.err.println("Whoops. Tried access invalid child");
185+
}
186+
if ( start!=null ) {
187+
int indentCol = start.getCharPositionInLine();
188+
charPosInLine = indentCol;
189+
output.append(Tool.spaces(indentCol));
190+
}
191+
}
192+
else if ( (align&0xFF)==CAT_INDENT_FROM_ANCESTOR_FIRST_TOKEN ) {
193+
int deltaFromAncestor = CollectFeatures.unindentcat(align);
194+
ParserRuleContext earliestLeftAncestor = earliestAncestorStartingWithToken(parent, curToken);
195+
if ( earliestLeftAncestor==null ) {
196+
earliestLeftAncestor = parent;
197+
}
208198
ParserRuleContext ancestor = CollectFeatures.getAncestor(earliestLeftAncestor, deltaFromAncestor);
209199
Token start = ancestor.getStart();
210200
int indentCol = start.getCharPositionInLine() + INDENT_LEVEL;
@@ -251,13 +241,12 @@ public TokenPositionAnalysis getTokenAnalysis(int[] features, int indexIntoRealT
251241
boolean prevIsWS = prevToken.getType()==JavaLexer.WS;
252242
int actualNL = Tool.count(prevToken.getText(), '\n');
253243
int actualWS = Tool.count(prevToken.getText(), ' ');
254-
boolean actualAlign = isAlignedWithFirstSiblingOfList(tokenToNodeMap, tokens, curToken);
255244
String newlinePredictionString = String.format("### line %d: predicted %d \\n actual %s",
256245
originalCurToken.getLine(), injectNewline, prevIsWS ? actualNL : "none");
257246
String alignPredictionString = String.format("### line %d: predicted %s actual %s",
258247
originalCurToken.getLine(),
259248
alignWithPrevious==1?"align":"unaligned",
260-
actualAlign?"align":"unaligned");
249+
"?");
261250
String wsPredictionString = String.format("### line %d: predicted %d ' ' actual %s",
262251
originalCurToken.getLine(), ws, prevIsWS ? actualWS : "none");
263252
if ( failsafeTriggered ) {

java/src/org/antlr/codebuff/InputDocument.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,10 @@ public String getLine(int line) {
4141
if ( lines==null ) {
4242
lines = Arrays.asList(content.split("\n"));
4343
}
44-
return lines.get(line-1);
44+
if ( line>0 ) {
45+
return lines.get(line-1);
46+
}
47+
return null;
4548
}
4649

4750
public double getIncorrectWSRate() {

java/src/org/antlr/codebuff/Neighbor.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@ public String toString(FeatureMetaData[] FEATURES, List<Integer> Y) {
2121
String lineText = doc.getLine(line);
2222
int col = X[CollectFeatures.INDEX_INFO_CHARPOS];
2323
// insert a dot right before char position
24-
lineText = lineText.substring(0,col) + '\u00B7' + lineText.substring(col,lineText.length());
25-
return String.format("%s (cat=%d,d=%1.3f): %s", features, Y.get(corpusVectorIndex), distance, lineText);
24+
if ( lineText!=null ) {
25+
lineText = lineText.substring(0, col)+'\u00B7'+lineText.substring(col, lineText.length());
26+
}
27+
int cat = Y.get(corpusVectorIndex);
28+
int[] elements = CollectFeatures.unaligncat(cat);
29+
String display = String.format("%d|%d|%d", cat&0xFF, elements[0], elements[1]);
30+
return String.format("%s (cat=%s,d=%1.3f): %s", features, display, distance, lineText);
2631
}
2732
}

java/src/org/antlr/codebuff/TokenContext.java

Lines changed: 0 additions & 34 deletions
This file was deleted.

java/src/org/antlr/codebuff/TokenPositionAnalysis.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@ public class TokenPositionAnalysis {
77
public String ws = "n/a";
88
public String align = "n/a";
99

10-
public TokenPositionAnalysis() {
11-
}
12-
1310
public TokenPositionAnalysis(String newline, String align, String ws) {
1411
this.align = align;
1512
this.newline = newline;

java/src/org/antlr/codebuff/Tool.java

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,7 @@
1414
import org.antlr.v4.runtime.misc.Pair;
1515
import org.antlr.v4.runtime.tree.ParseTreeWalker;
1616

17-
import java.io.BufferedWriter;
1817
import java.io.File;
19-
import java.io.FileWriter;
20-
import java.io.IOException;
2118
import java.lang.reflect.Constructor;
2219
import java.lang.reflect.Method;
2320
import java.nio.file.FileSystems;
@@ -119,21 +116,6 @@ public static Corpus train(String rootDir,
119116
return corpus;
120117
}
121118

122-
public void saveCSV(List<InputDocument> documents, String dir) throws IOException {
123-
FileWriter fw = new FileWriter(dir+"/style.csv");
124-
BufferedWriter bw = new BufferedWriter(fw);
125-
// bw.write(Utils.join(CollectFeatures.FEATURE_NAMES, ", "));
126-
bw.write("\n");
127-
for (InputDocument doc : documents) {
128-
for (int[] record : doc.featureVectors) {
129-
String r = join(record, ", ");
130-
bw.write(r);
131-
bw.write('\n');
132-
}
133-
}
134-
bw.close();
135-
}
136-
137119
public static Corpus processSampleDocs(List<InputDocument> docs,
138120
Class<? extends Lexer> lexerClass,
139121
Class<? extends Parser> parserClass,
@@ -164,9 +146,7 @@ public static Corpus processSampleDocs(List<InputDocument> docs,
164146
}
165147

166148
/** Parse document, save feature vectors to the doc but return it also */
167-
public static void process(InputDocument doc, int tabSize, Map<String, List<Pair<Integer, Integer>>> ruleToPairsBag)
168-
throws Exception
169-
{
149+
public static void process(InputDocument doc, int tabSize, Map<String, List<Pair<Integer, Integer>>> ruleToPairsBag) {
170150
CollectFeatures collector = new CollectFeatures(doc, tabSize, ruleToPairsBag);
171151
collector.computeFeatureVectors();
172152

@@ -234,7 +214,7 @@ public static List<InputDocument> load(List<String> fileNames,
234214
int tabSize)
235215
throws Exception
236216
{
237-
List<InputDocument> input = new ArrayList<InputDocument>(fileNames.size());
217+
List<InputDocument> input = new ArrayList<>(fileNames.size());
238218
int i = 0;
239219
for (String f : fileNames) {
240220
InputDocument doc = load(f, lexerClass, tabSize);
@@ -281,17 +261,17 @@ public static InputDocument load(String fileName,
281261
}
282262

283263
public static List<String> getFilenames(File f, String inputFilePattern) throws Exception {
284-
List<String> files = new ArrayList<String>();
264+
List<String> files = new ArrayList<>();
285265
getFilenames_(f, inputFilePattern, files);
286266
return files;
287267
}
288268

289-
public static void getFilenames_(File f, String inputFilePattern, List<String> files) throws Exception {
269+
public static void getFilenames_(File f, String inputFilePattern, List<String> files) {
290270
// If this is a directory, walk each file/dir in that directory
291271
if (f.isDirectory()) {
292272
String flist[] = f.list();
293-
for (int i=0; i < flist.length; i++) {
294-
getFilenames_(new File(f, flist[i]), inputFilePattern, files);
273+
for (String aFlist : flist) {
274+
getFilenames_(new File(f, aFlist), inputFilePattern, files);
295275
}
296276
}
297277

@@ -338,7 +318,6 @@ public static List<CommonToken> copy(CommonTokenStream tokens) {
338318
List<CommonToken> copy = new ArrayList<>();
339319
tokens.fill();
340320
for (Token t : tokens.getTokens()) {
341-
CommonToken ct = (CommonToken)t;
342321
copy.add(new CommonToken(t));
343322
}
344323
return copy;
@@ -364,6 +343,7 @@ public static double weightedL0_Distance(FeatureMetaData[] featureTypes, int[] A
364343
for (int i=0; i<A.length; i++) {
365344
if ( featureTypes[i].type==FeatureType.TOKEN ||
366345
featureTypes[i].type==FeatureType.RULE ||
346+
featureTypes[i].type==FeatureType.INT ||
367347
featureTypes[i].type==FeatureType.BOOL
368348
)
369349
{

java/src/org/antlr/codebuff/gui/GUIController.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import java.awt.*;
1717
import java.util.List;
1818

19-
import static javax.swing.JFrame.EXIT_ON_CLOSE;
2019
import static javax.swing.text.DefaultHighlighter.DefaultHighlightPainter;
2120

2221
public class GUIController {
@@ -71,7 +70,7 @@ public void show() throws Exception {
7170
scope.injectWSConsole.setFont(docFont);
7271

7372
JFrame frame = new JFrame("CodeBuff Scope");
74-
frame.setDefaultCloseOperation(EXIT_ON_CLOSE);
73+
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
7574
frame.getContentPane().add(scope.$$$getRootComponent$$$(), BorderLayout.CENTER);
7675

7776
scope.getFormattedTextPane().addCaretListener(new HighlightTokenListener());

0 commit comments

Comments
 (0)