1010
1111
1212def loadDataSet (fileName ):
13- dataMat = []; labelMat = []
13+ dataMat = []
14+ labelMat = []
1415 fr = open (fileName )
1516 for line in fr .readlines ():
1617 lineArr = line .strip ().split ('\t ' )
17- #dataMat.append([float(lineArr[0]), float(lineArr[1]), float(lineArr[2])])
18+ # dataMat.append([float(lineArr[0]), float(lineArr[1]), float(lineArr[2])])
1819 dataMat .append ([float (lineArr [0 ]), float (lineArr [1 ])])
1920 labelMat .append (float (lineArr [2 ]))
20- return dataMat ,labelMat
21+ return dataMat , labelMat
2122
2223
2324def seqPegasos (dataSet , labels , lam , T ):
24- m ,n = shape (dataSet ); w = zeros (n )
25+ m , n = shape (dataSet )
26+ w = zeros (n )
2527 for t in range (1 , T + 1 ):
2628 i = random .randint (m )
2729 eta = 1.0 / (lam * t )
28- p = predict (w , dataSet [i ,:])
30+ p = predict (w , dataSet [i , :])
2931 if labels [i ]* p < 1 :
30- w = (1.0 - 1 / t )* w + eta * labels [i ]* dataSet [i ,:]
32+ w = (1.0 - 1 / t )* w + eta * labels [i ]* dataSet [i , :]
3133 else :
3234 w = (1.0 - 1 / t )* w
3335 print w
@@ -39,46 +41,51 @@ def predict(w, x):
3941
4042
4143def batchPegasos (dataSet , labels , lam , T , k ):
42- m ,n = shape (dataSet ); w = zeros (n );
44+ m , n = shape (dataSet )
45+ w = zeros (n )
4346 dataIndex = range (m )
4447 for t in range (1 , T + 1 ):
45- wDelta = mat (zeros (n )) # 重置 wDelta
48+ wDelta = mat (zeros (n )) # 重置 wDelta
4649 eta = 1.0 / (lam * t )
4750 random .shuffle (dataIndex )
48- for j in range (k ):# 全部的训练集
51+ for j in range (k ): # 全部的训练集
4952 i = dataIndex [j ]
50- p = predict (w , dataSet [i ,:]) # mapper 代码
53+ p = predict (w , dataSet [i , :]) # mapper 代码
5154 if labels [i ]* p < 1 : # mapper 代码
52- wDelta += labels [i ]* dataSet [i ,:].A # 累积变化
55+ wDelta += labels [i ]* dataSet [i , :].A # 累积变化
5356 w = (1.0 - 1 / t )* w + (eta / k )* wDelta # 在每个 T上应用更改
5457 return w
5558
5659
57-
58- datArr ,labelList = loadDataSet ('testSet.txt' )
60+ datArr , labelList = loadDataSet ('input/15.BigData_MapReduce/testSet.txt' )
5961datMat = mat (datArr )
60- #finalWs = seqPegasos(datMat, labelList, 2, 5000)
62+ # finalWs = seqPegasos(datMat, labelList, 2, 5000)
6163finalWs = batchPegasos (datMat , labelList , 2 , 50 , 100 )
6264print finalWs
6365
6466import matplotlib
6567import matplotlib .pyplot as plt
6668fig = plt .figure ()
6769ax = fig .add_subplot (111 )
68- x1 = []; y1 = []; xm1 = []; ym1 = []
70+ x1 = []
71+ y1 = []
72+ xm1 = []
73+ ym1 = []
6974for i in range (len (labelList )):
7075 if labelList [i ] == 1.0 :
71- x1 .append (datMat [i ,0 ]); y1 .append (datMat [i ,1 ])
76+ x1 .append (datMat [i , 0 ])
77+ y1 .append (datMat [i , 1 ])
7278 else :
73- xm1 .append (datMat [i ,0 ]); ym1 .append (datMat [i ,1 ])
79+ xm1 .append (datMat [i , 0 ])
80+ ym1 .append (datMat [i , 1 ])
7481ax .scatter (x1 , y1 , marker = 's' , s = 90 )
7582ax .scatter (xm1 , ym1 , marker = 'o' , s = 50 , c = 'red' )
7683x = arange (- 6.0 , 8.0 , 0.1 )
77- y = (- finalWs [0 ,0 ]* x - 0 )/ finalWs [0 ,1 ]
78- #y2 = (0.43799*x)/0.12316
79- y2 = (0.498442 * x )/ 0.092387 # 2 iterations
80- ax .plot (x ,y )
81- ax .plot (x ,y2 ,'g-.' )
82- ax .axis ([- 6 ,8 , - 4 ,5 ])
83- ax .legend (('50 Iterations' , '2 Iterations' ) )
84+ y = (- finalWs [0 , 0 ]* x - 0 )/ finalWs [0 , 1 ]
85+ # y2 = (0.43799*x)/0.12316
86+ y2 = (0.498442 * x )/ 0.092387 # 2 iterations
87+ ax .plot (x , y )
88+ ax .plot (x , y2 , 'g-.' )
89+ ax .axis ([- 6 , 8 , - 4 , 5 ])
90+ ax .legend (('50 Iterations' , '2 Iterations' ))
8491plt .show ()
0 commit comments