Article directory
Program description
Grey correlation model is used to model the quality index data of red wine. Firstly, the data is standardized, and then the correlation coefficient matrix and the average comprehensive correlation degree are calculated
Program input: index matrix with the first column as the parent sequence
Program output: correlation matrix
Grey relational analysis (GRA) is a method to measure the degree of correlation among factors according to the degree of similarity or dissimilarity of the development trend among factors, i.e. "grey relational degree".
Program / dataset Download
Click to enter the download address
Screenshot of dataset
Figure 1. Quality index of red wine
Core code analysis
Module / buildmodel.py (interface, can run directly)
Establish grey relation model, input data can choose whether standardization is needed, the result is saved in the result of the object, and the code can run directly
# -*- coding: utf-8 -*- from sklearn.preprocessing import StandardScaler import pandas as pd import numpy as np import os class GraModel(): '''Grey relation analysis model''' def __init__(self,inputData,p=0.5,standard=True): ''' //Initialization parameters inputData: Input matrix, vertical axis is attribute name, first column is parent sequence p: Resolution coefficient, range 0~1,Generally take 0.5,The smaller the correlation coefficient is, the greater the difference is, and the stronger the discrimination ability is standard: Need standardization ''' self.inputData = np.array(inputData) self.p = p self.standard = standard #Standardization self.standarOpt() #modeling self.buildModel() def standarOpt(self): '''Standardized input data''' if not self.standard: return None self.scaler = StandardScaler().fit(self.inputData) self.inputData = self.scaler.transform(self.inputData) def buildModel(self): #The first column is the parent column, and the absolute difference with other columns is obtained momCol = self.inputData[:,0] sonCol = self.inputData[:,0:] for col in range(sonCol.shape[1]): sonCol[:,col] = abs(sonCol[:,col]-momCol) #Finding the minimum and maximum difference of two levels minMin = sonCol.min() maxMax = sonCol.max() #Calculation of correlation coefficient matrix cors = (minMin + self.p*maxMax)/(sonCol+self.p*maxMax) #Find the average comprehensive correlation degree meanCors = cors.mean(axis=0) self.result = {'cors':{'value':cors,'desc':'Correlation coefficient matrix'},'meanCors':{'value':meanCors,'desc':'Average comprehensive correlation coefficient'}} if __name__ == "__main__": #Path directory curDir = os.path.dirname(os.path.abspath(__file__))#current directory baseDir = os.path.dirname(curDir)#root directory staticDir = os.path.join(baseDir,'Static')#Static file directory resultDir = os.path.join(baseDir,'Result')#Results file directory #Reading data = [ [1,1.1,2,2.25,3,4], [1,1.166,1.834,2,2.314,3], [1,1.125,1.075,1.375,1.625,1.75], [1,1,0.7,0.8,0.9,1.2] ] data = np.array(data).T #modeling model = GraModel(data,standard=True) print(model.result)
Interface call and operation effect
Main.py
Call the interface of BuildModel.py, analyze the example, and get the correlation between red wine quality and red wine PH and other indicators.
# -*- coding: utf-8 -*- from Module.BuildModel import GraModel import pandas as pd import numpy as np import os import matplotlib.pyplot as plt import seaborn as sns #Path directory baseDir = os.path.dirname(os.path.abspath(__file__))#current directory staticDir = os.path.join(baseDir,'Static')#Static file directory resultDir = os.path.join(baseDir,'Result')#Results file directory #The first column of interface requirements is the parent sequence, i.e. red wine quality data = pd.read_csv(staticDir+'/winequality-red.csv',sep=';') columns = ['quality','fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar','chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density','pH', 'sulphates', 'alcohol'] data = data[columns] #Establish grey relation model and standardize data model = GraModel(data,standard=True) #Model calculation results result = model.result #Average relevance meanCors = result['meanCors']['value'] #Visualization chart #Used to display Chinese labels normally plt.rcParams['font.sans-serif']=['SimHei'] #Used to display negative sign normally plt.rcParams['axes.unicode_minus']=False #Visualization matrix plt.clf() plt.figure(figsize=(8,12)) sns.heatmap(meanCors.reshape(1,-1), square=True, annot=True, cbar=False, vmax=1.0, linewidths=0.1,cmap='viridis') plt.yticks([0,],['quality']) plt.xticks(np.arange(0.5,12.5,1),columns,rotation=90) plt.title('Index correlation matrix') plt.savefig(resultDir+'/Visualization matrix of index correlation degree.png',dpi=100,bbox_inches='tight')
Figure 2. Thermodynamic diagram of correlation matrix