2323 * Testing:
2424 *
2525 * Dbg -antlr corpus/antlr4/training grammars/org/antlr/codebuff/tsql.g4
26- * Dbg -leave-one-out - antlr corpus/antlr4/training corpus/antlr4/training/MASM.g4
27- * Dbg -leave-one-out - quorum corpus/quorum/training corpus/quorum/training/Containers/List.quorum
26+ * Dbg -antlr corpus/antlr4/training corpus/antlr4/training/MASM.g4
27+ * Dbg -quorum corpus/quorum/training corpus/quorum/training/Containers/List.quorum
2828 * Dbg -sqlite corpus/sqlclean/training corpus/sqlclean/training/dmart_bits.sql
29- * Dbg -leave-one-out - tsql corpus/sqlclean/training corpus/sqlclean/training/dmart_bits_PSQLRPT24.sql
29+ * Dbg -tsql corpus/sqlclean/training corpus/sqlclean/training/dmart_bits_PSQLRPT24.sql
3030 * Dbg -java corpus/java/training/stringtemplate4 src/org/antlr/codebuff/Tool.java
31- * Dbg -leave-one-out -java corpus/java/training/stringtemplate4 corpus/java/training/stringtemplate4/org/stringtemplate/v4/StringRenderer.java
32- * Dbg -leave-one-out - java_guava corpus/java/training/guava corpus/java/training/guava/base/Absent.java
31+ * Dbg -java_st corpus/java/training/stringtemplate4/org/stringtemplate/v4/StringRenderer.java
32+ * Dbg -java_guava corpus/java/training/guava/base/Absent.java
3333 * Dbg -java corpus/java/training/antlr4-tool corpus/java/training/stringtemplate4/org/stringtemplate/v4/AutoIndentWriter.java
3434 */
3535public class Dbg {
@@ -39,19 +39,14 @@ public static void main(String[] args)
3939 throws Exception
4040 {
4141 if ( args .length <2 ) {
42- System .err .println ("Dbg [-leave-one-out] [-java|-java8|-antlr|-sqlite|-tsql] root-dir-of-samples test-file" );
42+ System .err .println ("Dbg [-leave-one-out] [-java|-java8|-antlr|-sqlite|-tsql] test-file" );
4343 }
4444
4545 int arg = 0 ;
46- boolean leaveOneOut = false ;
46+ boolean leaveOneOut = true ;
4747 boolean collectAnalysis = true ;
48- if ( args [arg ].equals ("-leave-one-out" ) ) {
49- leaveOneOut = true ;
50- arg ++;
51- }
5248 String language = args [arg ++];
5349 language = language .substring (1 );
54- String corpusDir = args [arg ++];
5550 String testFilename = args [arg ];
5651 String output = "???" ;
5752 InputDocument testDoc = null ;
@@ -66,9 +61,9 @@ public static void main(String[] args)
6661 break ;
6762 }
6863 }
69- if ( lang !=null && leaveOneOut ) {
64+ if ( lang !=null ) {
7065 start = System .nanoTime ();
71- LeaveOneOutValidator validator = new LeaveOneOutValidator (corpusDir , lang );
66+ LeaveOneOutValidator validator = new LeaveOneOutValidator (lang . corpusDir , lang );
7267 Triple <Formatter ,Float ,Float > val = validator .validateOneDocument (testFilename , null , collectAnalysis );
7368 testDoc = Tool .parse (testFilename , lang );
7469 stop = System .nanoTime ();
@@ -96,41 +91,6 @@ public static void main(String[] args)
9691 ClassificationAnalysis analysis = new ClassificationAnalysis (testDoc , analysisPerToken );
9792 System .out .println (analysis );
9893 }
99- else if ( lang !=null ) {
100- Corpus corpus = new Corpus (corpusDir , lang );
101- corpus .train ();
102- testDoc = Tool .parse (testFilename , lang );
103- start = System .nanoTime ();
104- Formatter formatter = new Formatter (corpus ,lang .indentSize );
105- output = formatter .format (testDoc , collectAnalysis );
106- stop = System .nanoTime ();
107- analysisPerToken = formatter .getAnalysisPerToken ();
108-
109- ClassificationAnalysis analysis = new ClassificationAnalysis (testDoc , analysisPerToken );
110- System .out .println (analysis );
111-
112- CommonTokenStream original_tokens = Tool .tokenize (testDoc .content , corpus .language .lexerClass );
113- List <Token > wsTokens = filter (original_tokens .getTokens (),
114- t -> t .getText ().matches ("\\ s+" ));
115- String originalWS = tokenText (wsTokens );
116- // Utils.writeFile("/tmp/spaces1", originalWS);
117- // Utils.writeFile("/tmp/input", testDoc.content);
118- // Utils.writeFile("/tmp/output", output);
119-
120- CommonTokenStream formatted_tokens = Tool .tokenize (output , corpus .language .lexerClass );
121- wsTokens = filter (formatted_tokens .getTokens (),
122- t -> t .getText ().matches ("\\ s+" ));
123- String formattedWS = tokenText (wsTokens );
124- // Utils.writeFile("/tmp/spaces2", formattedWS);
125-
126- System .out .println ("len orig, formatted=" +testDoc .content .length ()+", " +output .length ());
127- System .out .println ("ws len orig, formatted=" +originalWS .length ()+", " +formattedWS .length ());
128-
129- float editDistance = normalizedLevenshteinDistance (originalWS , formattedWS );
130- System .out .println ("Levenshtein distance of ws: " +editDistance );
131- editDistance = normalizedLevenshteinDistance (testDoc .content , output );
132- System .out .println ("Levenshtein distance: " +editDistance );
133- }
13494
13595 if ( lang !=null ) {
13696 controller = new GUIController (analysisPerToken , testDoc , output , lang .lexerClass );
0 commit comments