Skip to content

Commit d215cad

Browse files
committed
rename a var. update comment for Dbg, simplify the code.
1 parent 66fb43f commit d215cad

File tree

2 files changed

+15
-53
lines changed

2 files changed

+15
-53
lines changed

src/org/antlr/codebuff/Dbg.java

Lines changed: 9 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@
2323
* Testing:
2424
*
2525
* Dbg -antlr corpus/antlr4/training grammars/org/antlr/codebuff/tsql.g4
26-
* Dbg -leave-one-out -antlr corpus/antlr4/training corpus/antlr4/training/MASM.g4
27-
* Dbg -leave-one-out -quorum corpus/quorum/training corpus/quorum/training/Containers/List.quorum
26+
* Dbg -antlr corpus/antlr4/training corpus/antlr4/training/MASM.g4
27+
* Dbg -quorum corpus/quorum/training corpus/quorum/training/Containers/List.quorum
2828
* Dbg -sqlite corpus/sqlclean/training corpus/sqlclean/training/dmart_bits.sql
29-
* Dbg -leave-one-out -tsql corpus/sqlclean/training corpus/sqlclean/training/dmart_bits_PSQLRPT24.sql
29+
* Dbg -tsql corpus/sqlclean/training corpus/sqlclean/training/dmart_bits_PSQLRPT24.sql
3030
* Dbg -java corpus/java/training/stringtemplate4 src/org/antlr/codebuff/Tool.java
31-
* Dbg -leave-one-out -java corpus/java/training/stringtemplate4 corpus/java/training/stringtemplate4/org/stringtemplate/v4/StringRenderer.java
32-
* Dbg -leave-one-out -java_guava corpus/java/training/guava corpus/java/training/guava/base/Absent.java
31+
* Dbg -java_st corpus/java/training/stringtemplate4/org/stringtemplate/v4/StringRenderer.java
32+
* Dbg -java_guava corpus/java/training/guava/base/Absent.java
3333
* Dbg -java corpus/java/training/antlr4-tool corpus/java/training/stringtemplate4/org/stringtemplate/v4/AutoIndentWriter.java
3434
*/
3535
public class Dbg {
@@ -39,19 +39,14 @@ public static void main(String[] args)
3939
throws Exception
4040
{
4141
if ( args.length<2 ) {
42-
System.err.println("Dbg [-leave-one-out] [-java|-java8|-antlr|-sqlite|-tsql] root-dir-of-samples test-file");
42+
System.err.println("Dbg [-leave-one-out] [-java|-java8|-antlr|-sqlite|-tsql] test-file");
4343
}
4444

4545
int arg = 0;
46-
boolean leaveOneOut = false;
46+
boolean leaveOneOut = true;
4747
boolean collectAnalysis = true;
48-
if ( args[arg].equals("-leave-one-out") ) {
49-
leaveOneOut = true;
50-
arg++;
51-
}
5248
String language = args[arg++];
5349
language = language.substring(1);
54-
String corpusDir = args[arg++];
5550
String testFilename = args[arg];
5651
String output = "???";
5752
InputDocument testDoc = null;
@@ -66,9 +61,9 @@ public static void main(String[] args)
6661
break;
6762
}
6863
}
69-
if ( lang!=null && leaveOneOut ) {
64+
if ( lang!=null ) {
7065
start = System.nanoTime();
71-
LeaveOneOutValidator validator = new LeaveOneOutValidator(corpusDir, lang);
66+
LeaveOneOutValidator validator = new LeaveOneOutValidator(lang.corpusDir, lang);
7267
Triple<Formatter,Float,Float> val = validator.validateOneDocument(testFilename, null, collectAnalysis);
7368
testDoc = Tool.parse(testFilename, lang);
7469
stop = System.nanoTime();
@@ -96,41 +91,6 @@ public static void main(String[] args)
9691
ClassificationAnalysis analysis = new ClassificationAnalysis(testDoc, analysisPerToken);
9792
System.out.println(analysis);
9893
}
99-
else if ( lang!=null ) {
100-
Corpus corpus = new Corpus(corpusDir, lang);
101-
corpus.train();
102-
testDoc = Tool.parse(testFilename, lang);
103-
start = System.nanoTime();
104-
Formatter formatter = new Formatter(corpus,lang.indentSize);
105-
output = formatter.format(testDoc, collectAnalysis);
106-
stop = System.nanoTime();
107-
analysisPerToken = formatter.getAnalysisPerToken();
108-
109-
ClassificationAnalysis analysis = new ClassificationAnalysis(testDoc, analysisPerToken);
110-
System.out.println(analysis);
111-
112-
CommonTokenStream original_tokens = Tool.tokenize(testDoc.content, corpus.language.lexerClass);
113-
List<Token> wsTokens = filter(original_tokens.getTokens(),
114-
t -> t.getText().matches("\\s+"));
115-
String originalWS = tokenText(wsTokens);
116-
// Utils.writeFile("/tmp/spaces1", originalWS);
117-
// Utils.writeFile("/tmp/input", testDoc.content);
118-
// Utils.writeFile("/tmp/output", output);
119-
120-
CommonTokenStream formatted_tokens = Tool.tokenize(output, corpus.language.lexerClass);
121-
wsTokens = filter(formatted_tokens.getTokens(),
122-
t -> t.getText().matches("\\s+"));
123-
String formattedWS = tokenText(wsTokens);
124-
// Utils.writeFile("/tmp/spaces2", formattedWS);
125-
126-
System.out.println("len orig, formatted="+testDoc.content.length()+", "+output.length());
127-
System.out.println("ws len orig, formatted="+originalWS.length()+", "+formattedWS.length());
128-
129-
float editDistance = normalizedLevenshteinDistance(originalWS, formattedWS);
130-
System.out.println("Levenshtein distance of ws: "+editDistance);
131-
editDistance = normalizedLevenshteinDistance(testDoc.content, output);
132-
System.out.println("Levenshtein distance: "+editDistance);
133-
}
13494

13595
if ( lang!=null ) {
13696
controller = new GUIController(analysisPerToken, testDoc, output, lang.lexerClass);

src/org/antlr/codebuff/Trainer.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ public class Trainer {
5252
/** When computing child indexes, we use this value for any child list
5353
* element other than the first one. If a parent has just one X child,
5454
* we use the actual child index. If parent has two or more X children,
55-
* and we are not the first X, use CHILD_INDEX_LIST_ELEMENT. If first
55+
* and we are not the first X, use CHILD_INDEX_REPEATED_ELEMENT. If first
5656
* of two or more X children, use actual child index.
5757
*/
58-
public static final int CHILD_INDEX_LIST_ELEMENT = 1_111_111_111;
58+
public static final int CHILD_INDEX_REPEATED_ELEMENT = 1_111_111_111;
5959

6060
public static final int LIST_PREFIX = 0;
6161
public static final int LIST_FIRST_ELEMENT = 1;
@@ -352,7 +352,9 @@ public static int getAlignmentCategory(InputDocument doc, TerminalNode node, int
352352
if ( alignInfo.a < indentInfo.a ) {
353353
return aligncat(alignInfo.a, alignInfo.b);
354354
}
355+
// Choose indentation over alignment if both at same ancestor level
355356
return indentcat(indentInfo.a, indentInfo.b);
357+
// return aligncat(alignInfo.a, alignInfo.b); // Should not use alignment over indentation; manual review of output shows indentation kinda messed up
356358
}
357359

358360
// otherwise just return the align or indent we computed
@@ -968,15 +970,15 @@ public static int getChildIndexOrListMembership(ParseTree t) {
968970
List<ParserRuleContext> siblings =
969971
((ParserRuleContext)parent).getRuleContexts(((ParserRuleContext)t).getClass());
970972
if ( siblings.size()>1 && siblings.indexOf(t)>0 ) {
971-
return CHILD_INDEX_LIST_ELEMENT;
973+
return CHILD_INDEX_REPEATED_ELEMENT;
972974
}
973975
}
974976
// check to see if we are 2nd or beyond repeated token
975977
if ( t instanceof TerminalNode ) {
976978
List<TerminalNode> repeatedTokens =
977979
((ParserRuleContext) parent).getTokens(((TerminalNode) t).getSymbol().getType());
978980
if ( repeatedTokens.size()>1 && repeatedTokens.indexOf(t)>0 ) {
979-
return CHILD_INDEX_LIST_ELEMENT;
981+
return CHILD_INDEX_REPEATED_ELEMENT;
980982
}
981983
}
982984

0 commit comments

Comments
 (0)