22# coding:utf8
33'''
44Created on 2017-04-07
5+ Update on 2017-06-20
56MapReduce version of Pegasos SVM
67Using mrjob to automate job flow
7- @author: Peter/ApacheCN-xy
8+ @author: Peter/ApacheCN-xy/片刻
9+ 《机器学习实战》更新地址:https://github.com/apachecn/MachineLearning
810'''
911from mrjob .job import MRJob
1012
@@ -17,14 +19,14 @@ class MRsvm(MRJob):
1719
1820 def __init__ (self , * args , ** kwargs ):
1921 super (MRsvm , self ).__init__ (* args , ** kwargs )
20- self .data = pickle .load (open ('input/15.BigData_MapReduce/svmDat27' ))
22+ self .data = pickle .load (open ('/opt/git/MachineLearning/ input/15.BigData_MapReduce/svmDat27' ))
2123 self .w = 0
2224 self .eta = 0.69
2325 self .dataList = []
2426 self .k = self .options .batchsize
2527 self .numMappers = 1
2628 self .t = 1 # iteration number
27-
29+
2830 def configure_options (self ):
2931 super (MRsvm , self ).configure_options ()
3032 self .add_passthrough_option (
@@ -42,20 +44,20 @@ def map(self, mapperId, inVals): # 需要 2 个参数
4244 self .w = inVals [1 ]
4345 elif inVals [0 ] == 'x' :
4446 self .dataList .append (inVals [1 ]) # 累积数据点计算
45- elif inVals [0 ] == 't' :
47+ elif inVals [0 ] == 't' : # 迭代次数
4648 self .t = inVals [1 ]
4749 else :
48- self .eta = inVals # 这用于 debug, eta未在map中使用
50+ self .eta = inVals # 这用于 debug, eta未在map中使用
4951
5052 def map_fin (self ):
51- labels = self .data [:,- 1 ]
52- X = self .data [:, 0 :- 1 ] # 将数据重新形成 X 和 Y
53- if self .w == 0 :
53+ labels = self .data [:, - 1 ]
54+ X = self .data [:, :- 1 ] # 将数据重新形成 X 和 Y
55+ if self .w == 0 :
5456 self .w = [0.001 ] * shape (X )[1 ] # 在第一次迭代时,初始化 w
5557 for index in self .dataList :
56- p = mat (self .w )* X [index , :].T # calc p=w*dataSet[key].T
58+ p = mat (self .w )* X [index , :].T # calc p=w*dataSet[key].T
5759 if labels [index ]* p < 1.0 :
58- yield (1 , ['u' , index ]) # 确保一切数据包含相同的key
60+ yield (1 , ['u' , index ]) # 确保一切数据包含相同的key
5961 yield (1 , ['w' , self .w ]) # 它们将在同一个 reducer
6062 yield (1 , ['t' , self .t ])
6163
@@ -66,7 +68,7 @@ def reduce(self, _, packedVals):
6668 elif valArr [0 ] == 'w' :
6769 self .w = valArr [1 ]
6870 elif valArr [0 ] == 't' :
69- self .t = valArr [1 ]
71+ self .t = valArr [1 ]
7072
7173 labels = self .data [:, - 1 ]
7274 X = self .data [:, 0 :- 1 ]
0 commit comments