Skip to content

Commit ffc1f42

Browse files
KNN explained and implemented and 2526 56837 35!
1 parent af54712 commit ffc1f42

4 files changed

Lines changed: 105 additions & 0 deletions

File tree

Machine Learning/KNN/Knn.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import numpy as np
2+
from collections import Counter
3+
4+
# distance formula in 2d
5+
def distform(a, b):
6+
return np.sqrt(np.sum(a-b)**2)
7+
8+
9+
class KNN:
10+
def __init__(self,k=4):
11+
#store the k val
12+
self.k = k
13+
#follow the scikit library convention.... fit method will fit the training samples
14+
def fit(self, X, y):
15+
#storing the vals
16+
self.X_train = X
17+
self.y_train = y
18+
19+
#predict new samples
20+
def predict(self, testSamps):
21+
#write a helper method which is a list
22+
predictedLabs = [self.__predict(x) for x in testSamps]
23+
#converting it into a numpy arr
24+
return np.array(predictedLabs)
25+
26+
#helper method which takes one sample
27+
def __predict(self, newsamp):
28+
29+
#predict the distances and find the nearest neighbor and their labels and I do a majority vote
30+
#and choose the most common class label
31+
#1. compute dists
32+
dist = [distform(newsamp,X_train) for X_train in self.X_train]
33+
34+
#2. get the knn and labels
35+
k_indeces = np.argsort(dist)[:self.k]
36+
k_nearest_label = [self.y_train[elem] for elem in k_indeces]
37+
38+
#3. perform a majority vote
39+
most_comm = Counter(k_nearest_label).most_common(1)
40+
return most_comm[0][0]
41+
42+

Machine Learning/KNN/Knntest.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import numpy as np
2+
from sklearn import datasets
3+
from sklearn.model_selection import train_test_split
4+
import matplotlib.pyplot as plt
5+
from matplotlib.colors import ListedColormap
6+
from Knn import KNN
7+
colormp = ListedColormap(["#FF0000", "#00FF00", "#0000FF"])
8+
9+
10+
iris = datasets.load_iris()
11+
X,y = iris.data, iris.target
12+
13+
14+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
15+
16+
'''
17+
print(X_train.shape)
18+
19+
#result is:(120, 4) where 120 is the number of samples and 4 is the number of features per sample
20+
21+
print(X_train[0])
22+
23+
print(y_train.shape)
24+
print(y_train)
25+
26+
plt.figure()
27+
28+
plt.scatter(X[:,0], X[:,1], c=y, cmap=colormp, edgecolor="k", s=20)
29+
plt.show()
30+
31+
alpha = [1,1,1,2,2,3,4,5,6]
32+
33+
34+
from collections import Counter
35+
36+
#print only one most common item
37+
most_common = Counter(alpha).most_common(1)
38+
print(most_common)
39+
'''
40+
41+
42+
myclassifier = KNN(k=3)
43+
myclassifier.fit(X_train, y_train)
44+
45+
#predicting my test samples
46+
predict = myclassifier.predict(X_test)
47+
48+
#predicting the accuracy of my model aka how many predictions I made are correctly classified
49+
how_accurate = np.sum(predict== y_test) / len(y_test)
50+
51+
print(how_accurate)

Machine Learning/KNN/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
### K Nearest Neighbor
2+
1. I have two classes and I have feature vectors which are 2d
3+
2. X(sub1) is the 1st axis and X(Sub2) is the 2nd axis
4+
3. I have a training sample
5+
4. For each new sample I want to classify. I calc the dist between the original sample to each of the training sample
6+
5. I take a look at the nearest neighbors
7+
6. I give a label to the nearest neighbors based on the most common neighbors
8+
7. For example, 2 green classes and a red class therefore the label will be green class
9+
8. To calculate the distance between the samples I use the euclidian distance aka the distance formula
10+
11+
### More general case formula:
12+
- Square root of (summation of (q-subi - p-subi)^2) from i to n where n is the num of dim
1.64 KB
Binary file not shown.

0 commit comments

Comments
 (0)