# K-nearest neighbor (knn) algorithm

When you wake up, you don't know where you are. You can locate five "nearest" neighbors by computer, including four on Mars and one on the moon. You think you should be closer to Mars, and you should be on Mars

# Case 1

``````from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

def knncls():

"""
//Forecast movie classification
:return:
"""
# Extract eigenvalue, target value
x = data.drop(["type", "movie_name"], axis=1)
y = data["type"]
# Split data set
x_train, x_test, y_train, y_test =train_test_split(x, y, test_size=0.25)

# Forecast by knn
knn = KNeighborsClassifier()

knn.fit(x_train, y_train)

y_predict = knn.predict(x_test)
print(x_test, "The forecast result of is:", y_predict)

print("The prediction accuracy is:", knn.score(x_test, y_test))

if __name__ == '__main__':
knncls()
``````
Accuracy
``````movie_name,fight,kiss,type
California Man,3,104,1
He's not Really into dues,2,100,1
Beautiful Woman,1,81,1
Robo Slayer 3000,99,5,2
Amped II,98,2,2
unname,18,90,1
vampire,90,15,2
``````

# Case 2 Facebook check in location train_data
``````from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

def knncls():
"""
facebook subject:k Nearest neighbor algorithm predicts occupancy location
:return:
"""
# Using pandas to read 100000 data
train_data = pd.read_csv("./data/fb/train.csv", nrows = 100000)

# Characteristic Engineering
# 1. Reduce the range of x,y
train_data = train_data.query("x>1.0 & x<1.5 & y>1.0 & y<2.5")

# 2. Parse timestamp
time_value = pd.to_datetime(train_data["time"], unit="s")
time_value = pd.DatetimeIndex(time_value)

train_data["weekday"] = time_value.weekday
train_data["year"] = time_value.day
train_data["hour"] = time_value.hour
train_data["minute"] = time_value.minute

# 4. Delete feature (time stamp)
train_data = train_data.drop(["time"], axis=1)

# 5. Only the place s with more than 5 check-in persons are reserved to generate new train_data
place_count = train_data.groupby("place_id").count()
place_count_r = place_count[place_count.row_id > 3].reset_index()
train_data = train_data[train_data["place_id"].isin(place_count_r["place_id"])]

# Extract eigenvalues and target values
x = train_data.drop(["place_id", "row_id"], axis=1)

y = train_data["place_id"]

# Split data set
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

# Standardize
std = StandardScaler()

x_train = std.fit_transform(x_train)
x_test = std.transform(x_test)

# Instantiate knn estimator
knn = KNeighborsClassifier()

knn.fit(x_train, y_train)

# Forecast results
y_predict = knn.predict(x_test)

# Printing accuracy
print("The accuracy is:",knn.score(x_test, y_test))

return None

if __name__ == '__main__':
knncls()
``````

Posted by gary00ie on Thu, 30 Apr 2020 06:14:46 -0700