#
import numpy as np import pandas as pd from pandas import Series,DataFrame import matplotlib.pyplot as plt from pylab import mpl mpl.rcParams['font.sans-serif'] = ['FangSong'] # Specify default font mpl.rcParams['axes.unicode_minus'] = False # Resolve save image is negative'-'Questions displayed as squares //Import data coastal city data-- # Remove useless columns city_list = [ferrara,torino,mantova,milano,ravenna,asti,bologna,piacenza,cesena,faenza] for city in city_list: city.drop('Unnamed: 0',axis=1,inplace=True) #Show the relationship between the highest temperature and the distance from the sea (observe multiple cities) city_max_temp = [] #City city_dist = [] #distance for city in city_list: temp = city['temp'].max() dist = city['dist'].max() city_max_temp.append(temp) city_dist.append(dist) plt.scatter(city_dist,city_max_temp) #x independent variable y dependent variable plt.xlabel('distance') plt.ylabel('Maximum temperature') plt.title('Relationship between the maximum temperature range') //Observation shows that a straight line can be formed for those close to the sea, and a straight line can be formed for those far from the sea. - Take 100km and 50km as the dividing points respectively, and divide them into two groups of data close to and far from the sea(Offshore: less than 100 high seas: more than 50) #Data transferred to numpy in city_dist = np.array(city_dist) #Array of arbitrary dimensions city_max_temp = np.array(city_max_temp) #Conditional judgment condition = city_dist < 100 #True False Boolean value near_city_dist = city_dist[condition] #Index only True Corresponding value near_city_temp = city_max_temp[condition] #Indexes #Drawing below
plt.scatter(near_city_dist,near_city_temp) #x independent variable y dependent variable plt.xlabel('Offshore distance') plt.ylabel('Offshore maximum temperature') plt.title('The relationship between the maximum temperature range of coastal cities')
# sklearn
Machine learning: Algorithm model -- special object. Internally, it has helped us integrate or encapsulate an algorithm or an equation (the equation that has not yet been solved). Sample data -- data of sample object help equation to find solution
Characteristic data - independent variable
Target data dependent variable
Model classification
Supervised learning sample data must contain characteristic data and target data
- linear regression algorithm model
Unsupervised learning contains only feature data
Prediction of pre training and post training of semi supervised learning (less use)
The function of algorithm model--
Unknown prediction
Classification
score algorithm#Characteristic data feature = near_city_dist.reshape(-1,1) #target data target = near_city_temp print('True value',target) print('predicted value',linner.predict(feature))