K-nearest neighbor (knn) algorithm

K-Neighborhood thought: determine your category according to your "neighbors"

When you wake up, you don't know where you are. You can locate five "nearest" neighbors by computer, including four on Mars and one on the moon. You think you should be closer to Mars, and you should be on Mars

Case 1

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd

def knncls():

    //Forecast movie classification
    data = pd.read_csv("./data/movies.csv")
    # Extract eigenvalue, target value
    x = data.drop(["type", "movie_name"], axis=1)
    y = data["type"]
    # Split data set
    x_train, x_test, y_train, y_test =train_test_split(x, y, test_size=0.25)

    # Forecast by knn
    knn = KNeighborsClassifier()

    knn.fit(x_train, y_train)

    y_predict = knn.predict(x_test)
    print(x_test, "The forecast result of is:", y_predict)

    print("The prediction accuracy is:", knn.score(x_test, y_test))

if __name__ == '__main__':
California Man,3,104,1
He's not Really into dues,2,100,1
Beautiful Woman,1,81,1
Kevin Longblade,101,10,2
Robo Slayer 3000,99,5,2
Amped II,98,2,2

Case 2 Facebook check in location

facebook predicts where to stay
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

def knncls():
    facebook subject:k Nearest neighbor algorithm predicts occupancy location
    # Using pandas to read 100000 data
    train_data = pd.read_csv("./data/fb/train.csv", nrows = 100000)

    # Characteristic Engineering
    # 1. Reduce the range of x,y
    train_data = train_data.query("x>1.0 & x<1.5 & y>1.0 & y<2.5")

    # 2. Parse timestamp
    time_value = pd.to_datetime(train_data["time"], unit="s")
    time_value = pd.DatetimeIndex(time_value)

    # 3. Add feature (time)
    train_data["weekday"] = time_value.weekday
    train_data["year"] = time_value.day
    train_data["hour"] = time_value.hour
    train_data["minute"] = time_value.minute

    # 4. Delete feature (time stamp)
    train_data = train_data.drop(["time"], axis=1)

    # 5. Only the place s with more than 5 check-in persons are reserved to generate new train_data
    place_count = train_data.groupby("place_id").count()
    place_count_r = place_count[place_count.row_id > 3].reset_index()
    train_data = train_data[train_data["place_id"].isin(place_count_r["place_id"])]

    # Extract eigenvalues and target values
    x = train_data.drop(["place_id", "row_id"], axis=1)

    y = train_data["place_id"]

    # Split data set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

    # Standardize
    std = StandardScaler()

    x_train = std.fit_transform(x_train)
    x_test = std.transform(x_test)

    # Instantiate knn estimator
    knn = KNeighborsClassifier()

    knn.fit(x_train, y_train)

    # Forecast results
    y_predict = knn.predict(x_test)

    # Printing accuracy
    print("The accuracy is:",knn.score(x_test, y_test))

    return None

if __name__ == '__main__':

Posted by gary00ie on Thu, 30 Apr 2020 06:14:46 -0700