Skip to content

Commit 47efe5c

Browse files
committed
improving graphs
1 parent 91ea96e commit 47efe5c

6 files changed

Lines changed: 38 additions & 39 deletions

File tree

809 Bytes
Binary file not shown.
-1.8 KB
Binary file not shown.

python/src/leave_one_out.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
import matplotlib.pyplot as plt
66
import pylab
77

8+
sqlite_noisy_err = [0.13043478, 0.05154639, 0.15294118, 0.19277108, 0.21489972, 0.22946176, 0.15879828, 0.3082353, 0.19762845, 0.18139535, 0.2877698, 0.37349397, 0.17948718, 0.22535211, 0.14634146, 0.08958485, 0.2345679, 0.1450285, 0.16030534, 0.13525836, 0.0790378, 0.34254143, 0.2638037, 0.17747441, 0.12962963, 0.12195122, 0.15044248, 0.18137255, 0.19936709, 0.16006884, 0.20842105, 0.2451613, 0.16241299, 0.171875, 0.24350336, 0.19052088]
9+
tsql_noisy_err = [0.13333334, 0.20731707, 0.05102041, 0.19318181, 0.2746479, 0.24431819, 0.18859649, 0.2, 0.22891566, 0.24884793, 0.23404256, 0.16788322, 0.21917808, 0.39112905, 0.20987654, 0.16666667, 0.12280702, 0.12691131, 0.35359117, 0.19109747, 0.26993865, 0.07020548, 0.083333336, 0.25, 0.112068966, 0.11873351, 0.1407767, 0.13291536, 0.20754717, 0.082474224, 0.18769231, 0.24183007, 0.11627907, 0.1375, 0.18386492, 0.21345165]
810
java_st_err = [0.027777778, 0.05496625, 0.08366534, 0.028478438, 0.183, 0.0623608, 0.11627907, 0.025316456, 0.05620438, 0.013661202, 0.22580644, 0.053435113, 0.0583691, 0.15625, 0.018867925, 0.015873017, 0.026402641, 0.10743801, 0.023002421, 0.035958905, 0.06918239, 0.028037382, 0.11784512, 0.072289154, 0.046296295, 0.021052632, 0.14925373, 0.01843318, 0.14925373, 0.21081081, 0.07917059, 0.036, 0.009708738, 0.07159353, 0.09047619, 0.12662722, 0.0375, 0.053097345, 0.033088237, 0.05376344, 0.05352798, 0.009433962, 0.0, 0.053030305, 0.0, 0.054545455, 0.018348623, 0.05263158, 0.07090464, 0.056986302, 0.024449877, 0.024922118, 0.054247696, 0.04506699, 0.059734512, 0.12195122, 0.0415625, 0.047557004, 0.061060857]
911
java8_st_err = [0.0, 0.184, 0.03168156, 0.07968128, 0.0545657, 0.11627907, 0.051923078, 0.060761347, 0.025316456, 0.013661202, 0.060944207, 0.18478261, 0.06870229, 0.21164021, 0.018867925, 0.015873017, 0.17094018, 0.026402641, 0.036020584, 0.062893085, 0.024213076, 0.10213243, 0.028037382, 0.060240965, 0.046296295, 0.036842104, 0.19402985, 0.01843318, 0.19402985, 0.036, 0.19459459, 0.009708738, 0.07728558, 0.11438679, 0.059768066, 0.050808314, 0.034375, 0.08243548, 0.029411765, 0.05109489, 0.05376344, 0.0, 0.016949153, 0.054545455, 0.0, 0.043560605, 0.018348623, 0.07017544, 0.047131147, 0.07334963, 0.04506699, 0.02200489, 0.046728972, 0.08943089, 0.04605263, 0.0570958, 0.042852674, 0.044960905, 0.044123713]
1012
java_guava_err = [0.009090909, 0.0, 0.057142857, 0.01891253, 0.027108435, 0.055555556, 0.027272727, 0.042682927, 0.021084337, 0.014760148, 0.029596413, 0.021934759, 0.04347826, 0.02173913, 0.0, 0.026415095, 0.02173913, 0.045498546, 0.02173913, 0.011904762, 0.01971831, 0.008370535, 0.032857142, 0.028827038, 0.016604178, 0.030075189, 0.030571992, 0.0, 0.018518519, 0.034146342, 0.026216686, 0.012658228, 0.012048192, 0.027314112, 0.01986755, 0.018856065, 0.014102564, 0.027123848, 0.011956521, 0.0, 0.008358663, 0.022227112, 0.027888447, 0.032, 0.008130081, 0.041841004, 0.02718676, 0.00990099, 0.003322259, 0.012096774, 0.014102564, 0.034313727, 0.040697675, 0.016216217, 0.0091743115, 0.010256411, 0.10204082, 0.015503876, 0.026785715, 0.059541985, 0.0, 0.005952381, 0.020833334, 0.028409092, 0.039408866, 0.02173913, 0.015384615, 0.006779661, 0.014577259, 0.07768469, 0.029891305, 0.013937282, 0.01671123, 0.0065312046, 0.016649324, 0.0056369784, 0.0020325202, 0.01875, 0.03163017, 0.015384615, 0.015873017, 0.012411779, 0.025773196, 0.007304602, 0.016806724, 0.0192, 0.030120483, 0.013565891, 0.0068897638, 0.06122449, 0.011661808, 0.01396648, 0.01724138, 0.018957347, 0.0076045627, 0.012941176, 0.0114613185, 0.01146789, 0.014925373, 0.03125, 0.027171277, 0.010365854, 0.030674847, 0.020007696, 0.027237354, 0.052863438, 0.030776093, 0.022109918, 0.008720931, 0.023076924, 0.010309278, 0.0070339977, 0.023088023, 0.029411765, 0.03846154, 0.011292347, 0.01208981, 0.008213553, 0.020224718, 0.022801302, 0.012924667, 0.0091743115, 0.008032128, 0.0121555915, 0.055555556, 0.013020833, 0.048387095, 0.021276595, 0.0055762082, 0.008583691, 0.013977128, 0.07317073, 0.0038834952, 0.015492254, 0.0, 0.021276595, 0.0, 0.013333334, 0.016620498, 0.0044444446, 0.014563107, 0.0123152705, 0.019292604, 0.0028089888, 0.025551684, 0.017449664, 0.036793694, 0.039473683, 0.012738854, 0.02173913, 0.012552301, 0.022530328, 0.009334889, 0.00896861, 0.015873017, 0.0014245014, 0.02668938, 0.02, 0.0091047045, 0.024193548, 0.009057971, 0.008403362, 0.0141187925, 0.016611295, 0.013268999, 0.021903323, 0.02153846, 0.022857143, 0.018549748, 0.01724138, 0.014771049, 0.035799522, 0.019434629, 0.014925373, 0.0121630505, 0.018009478, 0.012345679, 0.020652622, 0.01636444, 0.018989379, 0.018196415, 0.01746507, 0.015415549, 0.033950616, 0.025889968, 0.031948883, 0.01591512, 0.028753994, 0.022144811, 0.018708508, 0.0, 0.02009329, 0.025510008, 0.018932875, 0.008219178, 0.008287293, 0.011363637, 0.016627869, 0.0123854345, 0.013295832, 0.011764706, 0.01183432, 0.013644453, 0.011353899, 0.021132713, 0.009639564, 0.020057306, 0.0118733505, 0.026113672, 0.026595745, 0.01898734, 0.013888889, 0.011940299, 0.007832898, 0.0078125, 0.016073687, 0.007858546, 0.0, 0.010752688, 0.021857923, 0.024656067, 0.00877193, 0.012912482, 0.024829932, 0.019278096, 0.022304833, 0.028831564, 0.017301038, 0.024096385, 0.024523161, 0.019933555, 0.02096436, 0.014506769, 0.017832648, 0.011281225, 0.01811806, 0.024007387, 0.009940358, 0.00862069, 0.008064516, 0.024597919, 0.010256411, 0.00973236, 0.023809524, 0.028735632, 0.017857144, 0.039473683, 0.011952192, 0.036697246, 0.049953748, 0.008130081, 0.07017544, 0.01827957, 0.015789473, 0.019398643, 0.02413273, 0.016746411, 0.0116959065, 0.0091759, 0.009006391, 0.008064516, 0.020966802, 0.02739726, 0.010601719, 0.009015122, 0.025974026, 0.013314313, 0.007743363, 0.014030612, 0.036697246, 0.009708738, 0.028571429, 0.0047393367, 0.020836597, 0.032663316, 0.026845638, 0.027726432, 0.01622718, 0.030927835, 0.01438849, 0.029816514, 0.0, 0.027777778, 0.011627907, 0.015384615, 0.038416762, 0.029940119, 0.03587444, 0.011627907, 0.025032938, 0.0, 0.040229887, 0.045380875, 0.07692308, 0.036956523, 0.03277635, 0.023465704, 0.015503876, 0.020100502, 0.004878049, 0.007317073, 0.0151351355, 0.020833334, 0.023169601, 0.03108108, 0.019125683, 0.011052551, 0.033557046, 0.1, 0.035714287, 0.17948718, 0.027431421, 0.024066092, 0.0, 0.023131672, 0.01, 0.009615385, 0.011904762, 0.01908397, 0.033557046, 0.05952381, 0.049382716, 0.02112676, 0.0, 0.017216643, 0.020618556, 0.011019284, 0.005524862, 0.0042432817, 0.0046296297, 0.004733728, 0.014247552, 0.026463512, 0.013192612, 0.037037037, 0.100890204, 0.008498584, 0.0085796, 0.029940119, 0.033472802, 0.027582478, 0.0065789474, 0.028481012, 0.025531914, 0.03133903, 0.00862069, 0.019902699, 0.013906447, 0.020427112, 0.0, 0.014364641, 0.0060975607, 0.026515152, 0.06944445, 0.0, 0.022403259, 0.06477733, 0.06278027, 0.010526316, 0.020833334, 0.014173228, 0.016, 0.010302198, 0.010384216, 0.012376934, 0.013335613, 0.012738854, 0.012351327, 0.034802783, 0.016438356, 0.028037382, 0.018382354, 0.03125, 0.00896861, 0.022988506, 0.030303031, 0.024725275, 0.020066889, 0.021518987, 0.01425332, 0.0118811885, 0.014925373, 0.0, 0.016216217, 0.022796353, 0.03409091, 0.028633406, 0.02283342, 0.03685092, 0.015065913, 0.025477707, 0.0, 0.028304921, 0.01726264, 0.024930747, 0.015609756, 0.024049217, 0.023870418, 0.02432886, 0.037527595, 0.10616438, 0.043164168, 0.0, 0.02568982, 0.034401875, 0.07865169, 0.013209393, 0.009484292, 0.023342175, 0.03508772, 0.008350731, 0.009410802, 0.008287293, 0.0, 0.00983965, 0.025423728, 0.017405063, 0.0128410915, 0.01101393, 0.0074247755, 0.030955585, 0.014054054, 0.03163017, 0.01859628, 0.031390134, 0.016466118, 0.014221861, 0.0131752305, 0.019969279, 0.0, 0.008324662, 0.022146508, 0.022102747, 0.013114754, 0.039215688, 0.02183406, 0.018181818, 0.021063905, 0.030470913, 0.021847345, 0.021341464, 0.03802589, 0.028241334, 0.01754386, 0.028455285, 0.035714287, 0.031860225, 0.022238975, 0.036741216, 0.03649635, 0.02870067, 0.014084507, 0.028571429, 0.030063292, 0.03277175, 0.024504084, 0.01058201, 0.028313253, 0.019572955, 0.013605442, 0.0272, 0.03539823, 0.016806724, 0.030303031, 0.0030395137, 0.03311258, 0.025270758, 0.013490725, 0.014492754, 0.03237519, 0.019512195, 0.024691358, 0.016853932, 0.028065894, 0.027777778, 0.021186441, 0.027083334, 0.012345679, 0.036764707, 0.02016129, 0.014492754, 0.017021276, 0.026595745, 0.030508475, 0.0, 0.048780486, 0.037037037, 0.022522522, 0.031578947, 0.034596376, 0.020533253, 0.010666667, 0.029689608, 0.010204081, 0.026219714, 0.021472393, 0.03076923, 0.011437909, 0.047003526, 0.01640135, 0.025452489, 0.03469388, 0.029612755, 0.022813689, 0.0, 0.033613447, 0.014336918, 0.020346647, 0.016949153, 0.0, 0.015015015]
@@ -14,8 +16,9 @@
1416
tsql_err = [0.02173913, 0.105882354, 0.067226894, 0.14893617, 0.100271, 0.12903225, 0.14194915, 0.10358566, 0.09090909, 0.08431373, 0.09885932, 0.040268455, 0.071428575, 0.11764706, 0.071428575, 0.11, 0.08486239, 0.09867452, 0.061904762, 0.086021505, 0.106687896, 0.09396434, 0.052173913, 0.119873814, 0.06779661, 0.007575758, 0.107860014, 0.047826085, 0.13372093, 0.11731844, 0.06622516, 0.07373272, 0.0822335, 0.0862069, 0.07537012, 0.110918544]
1517
quorum_err = [0.04347826, 0.05882353, 0.08695652, 0.09677419, 0.01980198, 0.0625, 0.0, 0.0625, 0.029535865, 0.0625, 0.0, 0.0625, 0.0392562, 0.023255814, 0.0, 0.0625, 0.039215688, 0.11594203, 0.07692308, 0.0625, 0.020408163, 0.026666667, 0.03420523, 0.060606062, 0.028571429, 0.011627907, 0.02173913, 0.048611112, 0.14285715, 0.04597701, 0.078947365, 0.04597701, 0.044692736, 0.048611112, 0.01010101, 0.06666667, 0.06716418, 0.046153847, 0.046153847, 0.07317073, 0.054263566, 0.09166667, 0.00990099, 0.06699752, 0.044444446, 0.044612218, 0.044444446, 0.035714287, 0.035897437, 0.0974359, 0.035714287, 0.059679765, 0.120508984, 0.035714287, 0.03373494, 0.05263158, 0.027842227, 0.086666666, 0.027027028, 0.041526373, 0.032258064, 0.06954437, 0.032258064, 0.0, 0.0, 0.0040816325, 0.037456445, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.02982555, 0.032258064, 0.0, 0.023076924, 0.093023255, 0.02690583, 0.09803922, 0.041800644, 0.05, 0.030150754, 0.04901961, 0.011111111, 0.02027027, 0.009933775, 0.024390243, 0.0, 0.08130081, 0.03529412, 0.1, 0.05357143, 0.01923077, 0.05901639, 0.072972976, 0.037037037, 0.043624163, 0.043149944, 0.009708738, 0.020942409, 0.0546875, 0.022727273, 0.04404145, 0.05574913, 0.10724638, 0.042043086]
1618

17-
language_data = [antlr_err, java_st_err, java8_st_err, java_guava_err, java8_guava_err, sqlite_err, tsql_err]
18-
labels = ["antlr\nn=12", "java_st\nn=59", "java8_st\nn=59", "java_guava\nn=511", "java8_guava\nn=511", "sqlite\nn=36", "tsql\nn=36"]
19+
20+
language_data = [antlr_err, java_st_err, java8_st_err, java_guava_err, java8_guava_err, sqlite_err, tsql_err, sqlite_noisy_err, tsql_noisy_err]
21+
labels = ["antlr\nn=12", "java_st\nn=59", "java8_st\nn=59", "java_guava\nn=511", "java8_guava\nn=511", "sqlite\nn=36", "tsql\nn=36", "sqlite_noisy\nn=36", "tsql_noisy\nn=36"]
1922
fig = plt.figure()
2023
ax = plt.subplot(111)
2124
ax.boxplot(language_data,
@@ -25,7 +28,7 @@
2528
showfliers=False)
2629
ax.set_xticklabels(labels, rotation=60, fontsize=12)
2730
plt.xticks(range(1,len(labels)+1), labels, rotation=60)
28-
pylab.ylim([0,.22])
31+
pylab.ylim([0,.28])
2932
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)
3033
ax.set_xlabel("Grammar and corpus size", fontsize=14)
3134
ax.set_ylabel("Misclassification Error Rate", fontsize=14)

python/src/subset_validator.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,28 @@
11
#
22
# AUTO-GENERATED FILE. DO NOT EDIT
3-
# CodeBuff 1.4.15 'Wed May 18 13:10:39 PDT 2016'
3+
# CodeBuff 1.4.19 'Fri Jun 17 15:30:29 PDT 2016'
44
#
5+
import numpy as np
56
import matplotlib.pyplot as plt
67

78
fig = plt.figure()
89
ax = plt.subplot(111)
9-
N = 20
10+
N = 3
1011
sizes = range(1,N+1)
11-
java = [0.16304348,0.10237849,0.08571429,0.0798573,0.079562046,0.074458055,0.06195547,0.06196841,0.0625,0.04628502,0.06451613,0.055555556,0.06676238,0.057877813,0.05345912,0.058419243,0.05352798,0.05352798,0.0522541,0.07192575]
12-
ax.plot(sizes, java, label="java", marker='o')
13-
sqlite = [0.2282507,0.18888889,0.1773779,0.15086207,0.14102565,0.14479166,0.15476191,0.13793103,0.1223176,0.14035088,0.13157895,0.1218638,0.1375,0.13559322,0.109970674,0.12195122,0.110687025,0.13598673,0.11510792,0.1097561]
14-
ax.plot(sizes, sqlite, label="sqlite", marker='o')
15-
java8 = [0.15189873,0.12333629,0.094339624,0.076271184,0.074283235,0.07526882,0.073765434,0.06377325,0.07122233,0.054545455,0.059602648,0.055120576,0.0745614,0.05109489,0.04610656,0.05580357,0.05982906,0.050847456,0.046341464,0.046296295]
16-
ax.plot(sizes, java8, label="java8", marker='o')
17-
antlr = [0.2754491,0.2040521,0.2124031,0.17985612,0.1779661,0.16949153,0.18539326,0.16157518,0.16949153,0.16949153,0.16949153,0.15631849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]
18-
ax.plot(sizes, antlr, label="antlr", marker='o')
19-
tsql = [0.29104477,0.17131475,0.1609538,0.14893617,0.12794118,0.1299435,0.11306901,0.12872154,0.12873563,0.12241888,0.11083744,0.11026616,0.101369865,0.11002445,0.1080402,0.11764706,0.10382514,0.113300495,0.09202454,0.10176991]
20-
ax.plot(sizes, tsql, label="tsql", marker='o')
21-
java_guava = [0.093333334,0.07038835,0.052540164,0.053140096,0.042644944,0.03448276,0.03635262,0.03033839,0.034767237,0.03448276,0.032258064,0.028892456,0.029804727,0.02919708,0.028265107,0.027777778,0.024937656,0.027322404,0.029931974,0.023880597]
22-
ax.plot(sizes, java_guava, label="java_guava", marker='o')
12+
sqlite = [0.39566395,0.19639066,0.1870229]
13+
ax.plot(range(1,len(sqlite)+1), sqlite, label="sqlite", marker='o')
14+
antlr = [0.23529412,0.11906425,0.22540188]
15+
ax.plot(range(1,len(antlr)+1), antlr, label="antlr", marker='o')
16+
java_st = [0.1372315,0.07272727,0.06632213]
17+
ax.plot(range(1,len(java_st)+1), java_st, label="java_st", marker='o')
18+
java8_st = [0.2593828,0.06481481,0.0754717]
19+
ax.plot(range(1,len(java8_st)+1), java8_st, label="java8_st", marker='o')
20+
tsql = [0.23404256,0.24701196,0.13541667]
21+
ax.plot(range(1,len(tsql)+1), tsql, label="tsql", marker='o')
2322

2423
ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)
25-
ax.set_xlabel("Number n of training files in sample subset corpus")
26-
ax.set_ylabel("Median Error rate for 50 trials")
24+
ax.set_xlabel("Number n of training files in sample subset corpus", fontsize=14)
25+
ax.set_ylabel("Median Error rate for 5 trials", fontsize=14)
2726
ax.set_title("Effect of Corpus size on Median Leave-one-out Validation Error Rate")
2827
plt.legend()
2928
plt.tight_layout()

src/org/antlr/codebuff/validation/LeaveOneOutValidator.java

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,8 @@
2424
import java.util.concurrent.Executors;
2525
import java.util.concurrent.TimeUnit;
2626

27-
import static org.antlr.codebuff.Tool.ANTLR4_DESCR;
28-
import static org.antlr.codebuff.Tool.JAVA8_DESCR;
29-
import static org.antlr.codebuff.Tool.JAVA8_GUAVA_DESCR;
30-
import static org.antlr.codebuff.Tool.JAVA_DESCR;
31-
import static org.antlr.codebuff.Tool.JAVA_GUAVA_DESCR;
32-
import static org.antlr.codebuff.Tool.SQLITE_CLEAN_DESCR;
33-
import static org.antlr.codebuff.Tool.TSQL_CLEAN_DESCR;
27+
import static org.antlr.codebuff.Tool.SQLITE_NOISY_DESCR;
28+
import static org.antlr.codebuff.Tool.TSQL_NOISY_DESCR;
3429
import static org.antlr.codebuff.Tool.getFilenames;
3530
import static org.antlr.codebuff.Tool.load;
3631
import static org.antlr.codebuff.Tool.normalizedLevenshteinDistance;
@@ -262,7 +257,7 @@ public static String testAllLanguages(LangDescriptor[] languages, String[] corpu
262257
" showfliers=False)\n"+
263258
"ax.set_xticklabels(labels, rotation=60, fontsize=12)\n"+
264259
"plt.xticks(range(1,len(labels)+1), labels, rotation=60)\n" +
265-
"pylab.ylim([0,.22])\n"+
260+
"pylab.ylim([0,.28])\n"+
266261
"ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)\n" +
267262
"ax.set_xlabel(\"Grammar and corpus size\", fontsize=14)\n"+
268263
"ax.set_ylabel(\"Misclassification Error Rate\", fontsize=14)\n" +
@@ -275,13 +270,15 @@ public static String testAllLanguages(LangDescriptor[] languages, String[] corpu
275270

276271
public static void main(String[] args) throws Exception {
277272
LangDescriptor[] languages = new LangDescriptor[] {
278-
JAVA_DESCR,
279-
JAVA8_DESCR,
280-
JAVA_GUAVA_DESCR,
281-
JAVA8_GUAVA_DESCR,
282-
ANTLR4_DESCR,
283-
SQLITE_CLEAN_DESCR,
284-
TSQL_CLEAN_DESCR,
273+
// JAVA_DESCR,
274+
// JAVA8_DESCR,
275+
// JAVA_GUAVA_DESCR,
276+
// JAVA8_GUAVA_DESCR,
277+
// ANTLR4_DESCR,
278+
// SQLITE_CLEAN_DESCR,
279+
// TSQL_CLEAN_DESCR,
280+
SQLITE_NOISY_DESCR,
281+
TSQL_NOISY_DESCR,
285282
// QUORUM_DESCR,
286283
};
287284
List<String> corpusDirs = map(languages, l -> l.corpusDir);

src/org/antlr/codebuff/validation/SubsetValidator.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ public SubsetValidator(String rootDir, LangDescriptor language) throws Exception
5858
public static void main(String[] args) throws Exception {
5959
LangDescriptor[] languages = new LangDescriptor[] {
6060
// QUORUM_DESCR,
61+
ANTLR4_DESCR,
6162
JAVA_DESCR,
6263
JAVA8_DESCR,
63-
ANTLR4_DESCR,
6464
// SQLITE_NOISY_DESCR,
6565
SQLITE_CLEAN_DESCR,
6666
// TSQL_NOISY_DESCR,
@@ -87,11 +87,11 @@ public static void main(String[] args) throws Exception {
8787
"sizes = range(1,N+1)\n" +
8888
"<results:{r |\n" +
8989
"<r> = [<rest(results.(r)); separator={,}>]\n"+
90-
"ax.plot(sizes, <r>, label=\"<r>\", marker='o')\n" +
90+
"ax.plot(range(1,len(<r>)+1), <r>, label=\"<r>\", marker='o')\n" +
9191
"}>\n" +
9292
"ax.yaxis.grid(True, linestyle='-', which='major', color='lightgrey', alpha=0.5)\n" +
93-
"ax.set_xlabel(\"Number n of training files in sample subset corpus\")\n"+
94-
"ax.set_ylabel(\"Median Error rate for <trials> trials\")\n" +
93+
"ax.set_xlabel(\"Number n of training files in sample subset corpus\", fontsize=14)\n"+
94+
"ax.set_ylabel(\"Median Error rate for <trials> trials\", fontsize=14)\n" +
9595
"ax.set_title(\"Effect of Corpus size on Median Leave-one-out Validation Error Rate\")\n"+
9696
"plt.legend()\n" +
9797
"plt.tight_layout()\n" +
@@ -113,7 +113,7 @@ public static void main(String[] args) throws Exception {
113113
public static float[] getMedianErrorRates(LangDescriptor language, int maxNumFiles, int trials) throws Exception {
114114
SubsetValidator validator = new SubsetValidator(language.corpusDir, language);
115115
List<InputDocument> documents = load(validator.allFiles, language);
116-
float[] medians = new float[maxNumFiles+1];
116+
float[] medians = new float[Math.min(documents.size(),maxNumFiles)+1];
117117

118118
int ncpu = Runtime.getRuntime().availableProcessors();
119119
if ( FORCE_SINGLE_THREADED ) {

0 commit comments

Comments
 (0)