44import org .antlr .v4 .runtime .CommonTokenStream ;
55import org .antlr .v4 .runtime .ParserRuleContext ;
66import org .antlr .v4 .runtime .Token ;
7+ import org .antlr .v4 .runtime .WritableToken ;
78import org .antlr .v4 .runtime .misc .Interval ;
89import org .antlr .v4 .runtime .tree .TerminalNode ;
910
1011import java .util .List ;
1112import java .util .Map ;
1213import java .util .Vector ;
1314
15+ import static org .antlr .codebuff .CollectFeatures .CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN ;
16+ import static org .antlr .codebuff .CollectFeatures .CAT_ALIGN_WITH_ANCESTOR_FIRST_TOKEN ;
17+ import static org .antlr .codebuff .CollectFeatures .CAT_ALIGN_WITH_LIST_FIRST_ELEMENT ;
18+ import static org .antlr .codebuff .CollectFeatures .CAT_ALIGN_WITH_PAIR ;
19+ import static org .antlr .codebuff .CollectFeatures .CAT_INDENT ;
20+ import static org .antlr .codebuff .CollectFeatures .CAT_NO_ALIGNMENT ;
21+ import static org .antlr .codebuff .CollectFeatures .FEATURES_ALIGN ;
22+ import static org .antlr .codebuff .CollectFeatures .FEATURES_INJECT_NL ;
23+ import static org .antlr .codebuff .CollectFeatures .FEATURES_INJECT_WS ;
24+ import static org .antlr .codebuff .CollectFeatures .INDEX_FIRST_ON_LINE ;
25+ import static org .antlr .codebuff .CollectFeatures .INDEX_PREV_END_COLUMN ;
26+ import static org .antlr .codebuff .CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD ;
27+ import static org .antlr .codebuff .CollectFeatures .earliestAncestorEndingWithToken ;
28+ import static org .antlr .codebuff .CollectFeatures .getListSiblings ;
29+ import static org .antlr .codebuff .CollectFeatures .getMatchingLeftSymbol ;
30+ import static org .antlr .codebuff .CollectFeatures .getNodeFeatures ;
31+ import static org .antlr .codebuff .CollectFeatures .getRealTokens ;
32+ import static org .antlr .codebuff .CollectFeatures .getTokensOnPreviousLine ;
33+ import static org .antlr .codebuff .CollectFeatures .indexTree ;
34+ import static org .antlr .codebuff .CollectFeatures .isAlignedWithFirstSiblingOfList ;
35+
1436public class Formatter {
1537 protected final Corpus corpus ;
1638 protected StringBuilder output = new StringBuilder ();
@@ -26,13 +48,11 @@ public class Formatter {
2648
2749 protected CodekNNClassifier newlineClassifier ;
2850 protected CodekNNClassifier wsClassifier ;
29- protected CodekNNClassifier indentClassifier ;
3051 protected CodekNNClassifier alignClassifier ;
3152 protected int k ;
3253
3354 protected int line = 1 ;
3455 protected int charPosInLine = 0 ;
35- protected int currentIndent = 0 ;
3656
3757 protected int tabSize ;
3858
@@ -47,11 +67,11 @@ public Formatter(Corpus corpus, InputDocument doc, int tabSize) {
4767 this .tokens = doc .tokens ;
4868 this .originalTokens = Tool .copy (tokens );
4969 Tool .wipeLineAndPositionInfo (tokens );
50- newlineClassifier = new CodekNNClassifier (corpus , CollectFeatures . FEATURES_INJECT_NL );
51- wsClassifier = new CodekNNClassifier (corpus , CollectFeatures . FEATURES_INJECT_WS );
52- indentClassifier = new CodekNNClassifier (corpus , CollectFeatures . FEATURES_INDENT );
53- alignClassifier = new CodekNNClassifier ( corpus , CollectFeatures . FEATURES_ALIGN );
54- k = ( int ) Math . sqrt ( corpus . X . size ()) ;
70+ newlineClassifier = new CodekNNClassifier (corpus , FEATURES_INJECT_NL );
71+ wsClassifier = new CodekNNClassifier (corpus , FEATURES_INJECT_WS );
72+ alignClassifier = new CodekNNClassifier (corpus , FEATURES_ALIGN );
73+ // k = (int)Math.sqrt( corpus.X.size() );
74+ k = 11 ;
5575 this .tabSize = tabSize ;
5676 }
5777
@@ -66,15 +86,23 @@ public List<TokenPositionAnalysis> getAnalysisPerToken() {
6686
6787 public String format () {
6888 if ( tokenToNodeMap == null ) {
69- tokenToNodeMap = CollectFeatures . indexTree (root );
89+ tokenToNodeMap = indexTree (root );
7090 }
7191
7292 tokens .seek (0 );
73- Token secondToken = tokens .LT (2 );
93+ WritableToken firstToken = (WritableToken )tokens .LT (1 );
94+ WritableToken secondToken = (WritableToken )tokens .LT (2 );
95+ // all tokens are wiped of line/col info so set them for first 2
96+ firstToken .setLine (1 );
97+ firstToken .setCharPositionInLine (0 );
98+ secondToken .setLine (1 );
99+ secondToken .setCharPositionInLine (firstToken .getText ().length ());
100+
74101 String prefix = tokens .getText (Interval .of (0 , secondToken .getTokenIndex ()));
75102 output .append (prefix );
76103
77- realTokens = CollectFeatures .getRealTokens (tokens );
104+
105+ realTokens = getRealTokens (tokens );
78106 for (int i = 2 ; i <realTokens .size (); i ++) { // can't process first 2 tokens
79107 int tokenIndexInStream = realTokens .get (i ).getTokenIndex ();
80108 processToken (i , tokenIndexInStream );
@@ -86,18 +114,23 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
86114 CommonToken curToken = (CommonToken )tokens .get (tokenIndexInStream );
87115 String tokText = curToken .getText ();
88116
89- int [] features = CollectFeatures . getNodeFeatures (tokenToNodeMap , doc , tokenIndexInStream , line , tabSize );
117+ int [] features = getNodeFeatures (tokenToNodeMap , doc , tokenIndexInStream , line , tabSize );
90118 // must set "prev end column" value as token stream doesn't have it;
91119 // we're tracking it as we emit tokens
92- features [CollectFeatures . INDEX_PREV_END_COLUMN ] = charPosInLine ;
120+ features [INDEX_PREV_END_COLUMN ] = charPosInLine ;
93121
94- int injectNewline = newlineClassifier .classify (k , features , corpus .injectNewlines , CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD );
95- int alignWithPrevious = alignClassifier .classify (k , features , corpus .alignWithPrevious , CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD );
96- int indent = indentClassifier .classify (k , features , corpus .indent , CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD );
97- int ws = wsClassifier .classify (k , features , corpus .injectWS , CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD );
122+ int injectNewline = newlineClassifier .classify (k , features , corpus .injectNewlines , MAX_CONTEXT_DIFF_THRESHOLD );
123+
124+ // getNodeFeatures() also doesn't know what line curToken is on. If \n, we need to find exemplars that start a line
125+ features [INDEX_FIRST_ON_LINE ] = injectNewline ; // use \n prediction to match exemplars for alignment
126+
127+ int align = alignClassifier .classify (k , features , corpus .align , MAX_CONTEXT_DIFF_THRESHOLD );
128+ int indent = 0 ;
129+ //indentClassifier.classify(k, features, corpus.indent, CollectFeatures.MAX_CONTEXT_DIFF_THRESHOLD);
130+ int ws = wsClassifier .classify (k , features , corpus .injectWS , MAX_CONTEXT_DIFF_THRESHOLD );
98131
99132 TokenPositionAnalysis tokenPositionAnalysis =
100- getTokenAnalysis (features , indexIntoRealTokens , tokenIndexInStream , injectNewline , alignWithPrevious , indent , ws );
133+ getTokenAnalysis (features , indexIntoRealTokens , tokenIndexInStream , injectNewline , align , indent , ws );
101134 analysis .setSize (tokenIndexInStream +1 );
102135 analysis .set (tokenIndexInStream , tokenPositionAnalysis );
103136
@@ -108,36 +141,106 @@ public void processToken(int indexIntoRealTokens, int tokenIndexInStream) {
108141 if ( injectNewline >0 ) {
109142 output .append (Tool .newlines (injectNewline ));
110143 line ++;
111- TerminalNode node = tokenToNodeMap .get (tokens .get (tokenIndexInStream ));
112- ParserRuleContext parent = (ParserRuleContext )node .getParent ();
113- int myIndex = 0 ;
114- ParserRuleContext earliestAncestor = CollectFeatures .earliestAncestorStartingAtToken (parent , curToken );
115- if ( earliestAncestor !=null ) {
116- ParserRuleContext commonAncestor = earliestAncestor .getParent ();
117- List <ParserRuleContext > siblings = commonAncestor .getRuleContexts (earliestAncestor .getClass ());
118- myIndex = siblings .indexOf (earliestAncestor );
119- }
120- if ( myIndex >0 && alignWithPrevious >0 ) { // align with first sibling's start token
121- ParserRuleContext commonAncestor = earliestAncestor .getParent ();
122- List <ParserRuleContext > siblings = commonAncestor .getRuleContexts (earliestAncestor .getClass ());
123- ParserRuleContext firstSibling = siblings .get (0 );
124- Token firstSiblingStartToken = firstSibling .getStart ();
125- // align but don't update currentIndent
126- charPosInLine = firstSiblingStartToken .getCharPositionInLine ();
127- output .append (Tool .spaces (charPosInLine ));
144+ charPosInLine = 0 ;
145+
146+ List <Token > tokensOnPreviousLine = getTokensOnPreviousLine (tokens , tokenIndexInStream , line );
147+ Token firstTokenOnPrevLine = null ;
148+ if ( tokensOnPreviousLine .size ()>0 ) {
149+ firstTokenOnPrevLine = tokensOnPreviousLine .get (0 );
128150 }
129- else {
130- currentIndent += indent ;
131- if ( currentIndent <0 ) currentIndent = 0 ; // don't allow bad indents to accumulate
132- charPosInLine = currentIndent ;
133- output .append (Tool .spaces (currentIndent ));
151+
152+ TerminalNode node = tokenToNodeMap .get (curToken );
153+ ParserRuleContext parent = (ParserRuleContext )node .getParent ();
154+ ParserRuleContext earliestRightAncestor = earliestAncestorEndingWithToken (parent , curToken );
155+
156+ switch ( align ) {
157+ case CAT_INDENT :
158+ if ( firstTokenOnPrevLine !=null ) { // if not on first line, we can indent indent
159+ int indentedCol = firstTokenOnPrevLine .getCharPositionInLine () + 4 ;
160+ charPosInLine = indentedCol ;
161+ output .append (Tool .spaces (indentedCol ));
162+ }
163+ break ;
164+ case CAT_ALIGN_WITH_ANCESTOR_FIRST_TOKEN :
165+ if ( earliestRightAncestor !=null ) {
166+ Token earliestRightAncestorStart = earliestRightAncestor .getStart ();
167+ int linedUpCol = earliestRightAncestorStart .getCharPositionInLine ();
168+ charPosInLine = linedUpCol ;
169+ output .append (Tool .spaces (linedUpCol ));
170+ }
171+ break ;
172+ case CAT_ALIGN_WITH_ANCESTORS_PARENT_FIRST_TOKEN :
173+ if ( earliestRightAncestor !=null ) {
174+ ParserRuleContext earliestAncestorParent = earliestRightAncestor .getParent ();
175+ if ( earliestAncestorParent !=null ) {
176+ Token earliestAncestorParentStart = earliestAncestorParent .getStart ();
177+ int linedUpCol = earliestAncestorParentStart .getCharPositionInLine ();
178+ charPosInLine = linedUpCol ;
179+ output .append (Tool .spaces (linedUpCol ));
180+ }
181+ }
182+ break ;
183+ case CAT_ALIGN_WITH_LIST_FIRST_ELEMENT :
184+ List <ParserRuleContext > listSiblings = getListSiblings (tokenToNodeMap , curToken );
185+ if ( listSiblings !=null ) {
186+ ParserRuleContext firstSibling = listSiblings .get (0 );
187+ int linedUpCol = firstSibling .getStart ().getCharPositionInLine ();
188+ charPosInLine = linedUpCol ;
189+ output .append (Tool .spaces (linedUpCol ));
190+ }
191+ break ;
192+ case CAT_ALIGN_WITH_PAIR :
193+ TerminalNode matchingLeftSymbol = getMatchingLeftSymbol (doc , node );
194+ int linedUpCol = matchingLeftSymbol .getSymbol ().getCharPositionInLine ();
195+ charPosInLine = linedUpCol ;
196+ output .append (Tool .spaces (linedUpCol ));
197+ break ;
198+ case CAT_NO_ALIGNMENT :
199+ break ;
134200 }
201+ // if ( currentIndent<0 ) currentIndent = 0; // don't allow bad indents to accumulate
202+ // charPosInLine = currentIndent;
203+ // output.append(Tool.spaces(currentIndent));
135204 }
136205 else {
137206 // inject whitespace instead of \n?
138207 output .append (Tool .spaces (ws ));
139208 charPosInLine += ws ;
140209 }
210+
211+ // if ( injectNewline>0 ) {
212+ // output.append(Tool.newlines(injectNewline));
213+ // line++;
214+ // TerminalNode node = tokenToNodeMap.get(tokens.get(tokenIndexInStream));
215+ // ParserRuleContext parent = (ParserRuleContext)node.getParent();
216+ // int myIndex = 0;
217+ // ParserRuleContext earliestAncestor = CollectFeatures.earliestAncestorStartingWithToken(parent, curToken);
218+ // if ( earliestAncestor!=null ) {
219+ // ParserRuleContext commonAncestor = earliestAncestor.getParent();
220+ // List<ParserRuleContext> siblings = commonAncestor.getRuleContexts(earliestAncestor.getClass());
221+ // myIndex = siblings.indexOf(earliestAncestor);
222+ // }
223+ // if ( false ) { //if ( myIndex>0 && align>0 ) { // align with first sibling's start token
224+ // ParserRuleContext commonAncestor = earliestAncestor.getParent();
225+ // List<ParserRuleContext> siblings = commonAncestor.getRuleContexts(earliestAncestor.getClass());
226+ // ParserRuleContext firstSibling = siblings.get(0);
227+ // Token firstSiblingStartToken = firstSibling.getStart();
228+ // // align but don't update currentIndent
229+ // charPosInLine = firstSiblingStartToken.getCharPositionInLine();
230+ // output.append(Tool.spaces(charPosInLine));
231+ // }
232+ // else {
233+ // currentIndent += indent;
234+ // if ( currentIndent<0 ) currentIndent = 0; // don't allow bad indents to accumulate
235+ // charPosInLine = currentIndent;
236+ // output.append(Tool.spaces(currentIndent));
237+ // }
238+ // }
239+ // else {
240+ // // inject whitespace instead of \n?
241+ // output.append(Tool.spaces(ws));
242+ // charPosInLine += ws;
243+ // }
141244 // update Token object with position information now that we are about
142245 // to emit it.
143246 curToken .setLine (line );
@@ -171,8 +274,8 @@ public TokenPositionAnalysis getTokenAnalysis(int[] features, int indexIntoRealT
171274 boolean prevIsWS = prevToken .getType ()==JavaLexer .WS ;
172275 int actualNL = Tool .count (prevToken .getText (), '\n' );
173276 int actualWS = Tool .count (prevToken .getText (), ' ' );
174- int actualIndent = originalCurToken .getCharPositionInLine ()-currentIndent ;
175- boolean actualAlign = CollectFeatures . isAlignedWithFirstSibling (tokenToNodeMap , tokens , curToken );
277+ int actualIndent = originalCurToken .getCharPositionInLine ()-0 ; // currentIndent;
278+ boolean actualAlign = isAlignedWithFirstSiblingOfList (tokenToNodeMap , tokens , curToken );
176279 String newlinePredictionString = String .format ("### line %d: predicted %d \\ n actual %s" ,
177280 originalCurToken .getLine (), injectNewline , prevIsWS ? actualNL : "none" );
178281 String alignPredictionString = String .format ("### line %d: predicted %s actual %s" ,
@@ -189,18 +292,15 @@ public TokenPositionAnalysis getTokenAnalysis(int[] features, int indexIntoRealT
189292
190293
191294 String newlineAnalysis = newlinePredictionString +"\n " +
192- newlineClassifier .getPredictionAnalysis (k , features , corpus .injectNewlines ,
193- CollectFeatures . MAX_CONTEXT_DIFF_THRESHOLD );
295+ newlineClassifier .getPredictionAnalysis (doc , k , features , corpus .injectNewlines ,
296+ MAX_CONTEXT_DIFF_THRESHOLD );
194297 String alignAnalysis =alignPredictionString +"\n " +
195- alignClassifier .getPredictionAnalysis (k , features , corpus .alignWithPrevious ,
196- CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD );
197- String indentAnalysis =indentPredictionString +"\n " +
198- indentClassifier .getPredictionAnalysis (k , features , corpus .indent ,
199- CollectFeatures .MAX_CONTEXT_DIFF_THRESHOLD );
298+ alignClassifier .getPredictionAnalysis (doc , k , features , corpus .align ,
299+ MAX_CONTEXT_DIFF_THRESHOLD );
200300 String wsAnalysis =wsPredictionString +"\n " +
201- wsClassifier .getPredictionAnalysis (k , features , corpus .injectWS ,
202- CollectFeatures . MAX_CONTEXT_DIFF_THRESHOLD );
203- return new TokenPositionAnalysis (newlineAnalysis , alignAnalysis , indentAnalysis , wsAnalysis );
301+ wsClassifier .getPredictionAnalysis (doc , k , features , corpus .injectWS ,
302+ MAX_CONTEXT_DIFF_THRESHOLD );
303+ return new TokenPositionAnalysis (newlineAnalysis , alignAnalysis , wsAnalysis );
204304 }
205305
206306 /** Do not join two words like "finaldouble" or numbers like "3double",
0 commit comments