@@ -42,7 +42,7 @@ public static void main(String[] args)
4242 String testFilename = args [2 ];
4343 String output ;
4444 if ( language .equals ("-java" ) ) {
45- Corpus corpus = train (corpusDir , JavaLexer .class , JavaParser .class , tabSize );
45+ Corpus corpus = train (corpusDir , ".* \\ .java" , JavaLexer .class , JavaParser .class , "compilationUnit" , tabSize );
4646 InputDocument testDoc = load (testFilename , JavaLexer .class , tabSize );
4747 Pair <String ,List <TokenPositionAnalysis >> results = format (corpus , testDoc , tabSize );
4848 output = results .a ;
@@ -51,7 +51,7 @@ public static void main(String[] args)
5151 controller .show ();
5252 }
5353 else {
54- Corpus corpus = train (corpusDir , ANTLRv4Lexer .class , ANTLRv4Parser .class , tabSize );
54+ Corpus corpus = train (corpusDir , ".* \\ .g4" , ANTLRv4Lexer .class , ANTLRv4Parser .class , "grammarSpec" , tabSize );
5555 InputDocument testDoc = load (testFilename , ANTLRv4Lexer .class , tabSize );
5656 Pair <String ,List <TokenPositionAnalysis >> results = format (corpus , testDoc , tabSize );
5757 output = results .a ;
@@ -89,18 +89,20 @@ public static Pair<String,List<TokenPositionAnalysis>> format(Corpus corpus, Inp
8989 }
9090
9191 public static Corpus train (String rootDir ,
92+ String fileRegex ,
9293 Class <? extends Lexer > lexerClass ,
9394 Class <? extends Parser > parserClass ,
95+ String startRuleName ,
9496 int tabSize )
9597 throws Exception
9698 {
97- List <String > allFiles = getFilenames (new File (rootDir ), ".* \\ .java" );
99+ List <String > allFiles = getFilenames (new File (rootDir ), fileRegex );
98100 List <InputDocument > documents = load (allFiles , lexerClass , tabSize );
99101
100102 // Parse all documents into parse trees before training begins
101103 for (InputDocument doc : documents ) {
102104 if ( showFileNames ) System .out .println (doc );
103- parse (doc , lexerClass , parserClass , "compilationUnit" ); // TODO: make ruleName generic
105+ parse (doc , lexerClass , parserClass , startRuleName );
104106 }
105107
106108 // Walk all documents to compute matching token dependencies (we need this for feature computation)
0 commit comments