@@ -32,6 +32,11 @@ public class CollectFeatures {
3232 public static final int PAIR_ON_SAME_LINE = 0 ;
3333 public static final int PAIR_ON_DIFF_LINE = 1 ;
3434
35+ // Categories for newline, whitespace. CAT_INJECT_NL+n<<8 or CAT_INJECT_WS+n<<8
36+ public static final int CAT_NO_WS = 0 ;
37+ public static final int CAT_INJECT_NL = 100 ;
38+ public static final int CAT_INJECT_WS = 200 ;
39+
3540 // Categories for alignment/indentation
3641 public static final int CAT_NO_ALIGNMENT = 0 ;
3742
@@ -89,7 +94,7 @@ public class CollectFeatures {
8994
9095 public static final int NUM_FEATURES = 23 ;
9196
92- public static FeatureMetaData [] FEATURES_INJECT_NL = {
97+ public static FeatureMetaData [] FEATURES_INJECT_WS = { // inject ws or nl
9398 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-2)" }, 1 ),
9499 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-1)" }, 2 ),
95100 new FeatureMetaData (FeatureType .RULE , new String [] {"LT(-1)" , "rule" }, 2 ),
@@ -141,32 +146,6 @@ public class CollectFeatures {
141146 new FeatureMetaData (FeatureType .INFO_CHARPOS , new String [] {"char" , "pos" }, 0 )
142147 };
143148
144- public static FeatureMetaData [] FEATURES_INJECT_WS = {
145- new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-2)" }, 1 ),
146- new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-1)" }, 2 ),
147- new FeatureMetaData (FeatureType .RULE , new String [] {"LT(-1)" , "rule" }, 2 ),
148- FeatureMetaData .UNUSED ,
149- new FeatureMetaData (FeatureType .RULE , new String [] {"LT(-1)" , "right ancestor" }, 3 ),
150- new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(1)" }, 3 ),
151- FeatureMetaData .UNUSED ,
152- new FeatureMetaData (FeatureType .BOOL , new String []{"Strt" , "line" }, 3 ),
153- new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "rule" }, 2 ),
154- new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "right ancestor" }, 3 ),
155- new FeatureMetaData (FeatureType .RULE , new String [] {"LT(1)" , "left ancestor" }, 3 ),
156- new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^5" }, 1 ),
157- new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^4" }, 1 ),
158- new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^3" }, 1 ),
159- new FeatureMetaData (FeatureType .INT , new String [] {"ancestor's" , "parent^3 wid" }, 1 ),
160- new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent^2" }, 1 ),
161- new FeatureMetaData (FeatureType .INT , new String [] {"ancestor's" , "parent^2 wid" }, 1 ),
162- new FeatureMetaData (FeatureType .RULE , new String [] {"ancestor's" , "parent" }, 1 ),
163- new FeatureMetaData (FeatureType .INT , new String [] {"ancestor's" , "parent wid" }, 1 ),
164- new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(2)" }, 1 ),
165- new FeatureMetaData (FeatureType .INFO_FILE , new String [] {"" , "file" }, 0 ),
166- new FeatureMetaData (FeatureType .INFO_LINE , new String [] {"" , "line" }, 0 ),
167- new FeatureMetaData (FeatureType .INFO_CHARPOS , new String [] {"char" , "pos" }, 0 )
168- };
169-
170149 public static FeatureMetaData [] FEATURES_ALL = {
171150 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-2)" }, 1 ),
172151 new FeatureMetaData (FeatureType .TOKEN , new String [] {"" , "LT(-1)" }, 2 ),
@@ -201,9 +180,7 @@ public class CollectFeatures {
201180 protected ParserRuleContext root ;
202181 protected CommonTokenStream tokens ; // track stream so we can examine previous tokens
203182 protected List <int []> features = new ArrayList <>();
204- protected List <Integer > injectNewlines = new ArrayList <>();
205- protected List <Integer > injectWS = new ArrayList <>();
206- protected List <Integer > indent = new ArrayList <>();
183+ protected List <Integer > injectWhitespace = new ArrayList <>();
207184 protected List <Integer > align = new ArrayList <>();
208185
209186 protected int currentIndent = 0 ;
@@ -246,7 +223,20 @@ public void computeFeatureVectorForToken(int i) {
246223
247224 int precedingNL = getPrecedingNL (tokens , i ); // how many lines to inject
248225
249- this .injectNewlines .add (precedingNL );
226+ int ws = 0 ;
227+ if ( precedingNL ==0 ) {
228+ ws = curToken .getCharPositionInLine () -
229+ (prevToken .getCharPositionInLine ()+prevToken .getText ().length ());
230+ }
231+
232+ int injectNL_WS = CAT_NO_WS ;
233+ if ( precedingNL >0 ) {
234+ injectNL_WS = nlcat (precedingNL );
235+ }
236+ else if ( ws >0 ) {
237+ injectNL_WS = wscat (ws );
238+ }
239+ this .injectWhitespace .add (injectNL_WS );
250240
251241 int columnDelta = 0 ;
252242 if ( precedingNL >0 ) { // && aligned!=1 ) {
@@ -259,14 +249,6 @@ public void computeFeatureVectorForToken(int i) {
259249 aligned = getAlignmentCategory (node , curToken , columnDelta );
260250 }
261251
262- int ws = 0 ;
263- if ( precedingNL ==0 ) {
264- ws = curToken .getCharPositionInLine () -
265- (prevToken .getCharPositionInLine ()+prevToken .getText ().length ());
266- }
267-
268- this .injectWS .add (ws ); // likely negative if precedingNL
269-
270252 this .align .add (aligned );
271253
272254 this .features .add (features );
@@ -631,12 +613,8 @@ public List<int[]> getFeatures() {
631613 return features ;
632614 }
633615
634- public List <Integer > getInjectNewlines () {
635- return injectNewlines ;
636- }
637-
638- public List <Integer > getInjectWS () {
639- return injectWS ;
616+ public List <Integer > getInjectWhitespace () {
617+ return injectWhitespace ;
640618 }
641619
642620 public List <Integer > getAlign () {
@@ -843,4 +821,20 @@ public static int[] unaligncat(int v) {
843821 int child = (v >>16 )&0xFFFF ;
844822 return new int [] { deltaFromLeftAncestor , child };
845823 }
824+
825+ public static int wscat (int n ) {
826+ return CAT_INJECT_WS | (n <<8 );
827+ }
828+
829+ public static int nlcat (int n ) {
830+ return CAT_INJECT_NL | (n <<8 );
831+ }
832+
833+ public static int unwscat (int v ) {
834+ return v >> 8 & 0xFFFF ;
835+ }
836+
837+ public static int unnlcat (int v ) {
838+ return v >> 8 & 0xFFFF ;
839+ }
846840}
0 commit comments