sklearn linear regression

Keywords: PHP less

#

import numpy as np
import pandas as pd
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
from pylab import mpl

mpl.rcParams['font.sans-serif'] = ['FangSong'] # Specify default font
mpl.rcParams['axes.unicode_minus'] = False # Resolve save image is negative'-'Questions displayed as squares

//Import data coastal city data--
# Remove useless columns
city_list = [ferrara,torino,mantova,milano,ravenna,asti,bologna,piacenza,cesena,faenza]
for city in city_list:
    city.drop('Unnamed: 0',axis=1,inplace=True)

#Show the relationship between the highest temperature and the distance from the sea (observe multiple cities)
city_max_temp = [] #City
city_dist = [] #distance
for city in city_list: 
    temp = city['temp'].max()
    dist = city['dist'].max()
    city_max_temp.append(temp)
    city_dist.append(dist)
plt.scatter(city_dist,city_max_temp)   #x independent variable   y dependent variable
plt.xlabel('distance')
plt.ylabel('Maximum temperature')
plt.title('Relationship between the maximum temperature range')
//Observation shows that a straight line can be formed for those close to the sea, and a straight line can be formed for those far from the sea.

- Take 100km and 50km as the dividing points respectively, and divide them into two groups of data close to and far from the sea(Offshore: less than 100 high seas: more than 50)
#Data transferred to numpy in
city_dist = np.array(city_dist) #Array of arbitrary dimensions
city_max_temp = np.array(city_max_temp)
#Conditional judgment
condition = city_dist < 100  #True False Boolean value
near_city_dist = city_dist[condition]  #Index only True Corresponding value
near_city_temp = city_max_temp[condition] #Indexes
#Drawing below
plt.scatter(near_city_dist,near_city_temp) #x independent variable y dependent variable plt.xlabel('Offshore distance') plt.ylabel('Offshore maximum temperature') plt.title('The relationship between the maximum temperature range of coastal cities')

# sklearn

Machine learning:
Algorithm model -- special object. Internally, it has helped us integrate or encapsulate an algorithm or an equation (the equation that has not yet been solved).
Sample data -- data of sample object help equation to find solution
Characteristic data - independent variable
Target data dependent variable
Model classification
Supervised learning sample data must contain characteristic data and target data
- linear regression algorithm model
Unsupervised learning contains only feature data
Prediction of pre training and post training of semi supervised learning (less use)
The function of algorithm model--
Unknown prediction
Classification
#Characteristic data
feature = near_city_dist.reshape(-1,1)
#target data
target = near_city_temp
print('True value',target)
print('predicted value',linner.predict(feature))
score algorithm
# Import sklearn Establish the model object of linear regression algorithm   
from sklearn.linear_model import LinearRegression #Linear regression supervised learning
linner = LinearRegression() #instantiation s
#solve(Training model):Sample data is required(Features,target) Bring into model object
linner.fit(near_city_dist.reshape(-1,1),near_city_temp) #X Feature data can only be 2D  y:target  reshape(Ranks)
y = linner.predict([[81],[90]]) #Call equation (X)
linner.score(near_city_dist.reshape(-1,1),near_city_temp)   #Calculate model scores

#draw a straight line(Is made up of points)
x = np.linspace(0,80,100)
y = linner.predict(x.reshape(-1,1))
plt.scatter(near_city_dist,near_city_temp)
plt.xlabel('Distance between coastal cities')
plt.ylabel('Maximum temperature in coastal cities')
plt.title('The relationship between the maximum temperature and distance in coastal cities')
plt.scatter(x,y)

Posted by rowantrimmer on Thu, 31 Oct 2019 16:14:23 -0700