Grey relational analysis (GRA) implemented by python -- Taking red wine quality index as an example

Keywords: Attribute

Article directory

Program description

Grey correlation model is used to model the quality index data of red wine. Firstly, the data is standardized, and then the correlation coefficient matrix and the average comprehensive correlation degree are calculated
Program input: index matrix with the first column as the parent sequence
Program output: correlation matrix

Grey relational analysis (GRA) is a method to measure the degree of correlation among factors according to the degree of similarity or dissimilarity of the development trend among factors, i.e. "grey relational degree".

Program / dataset Download

Click to enter the download address

Screenshot of dataset

Figure 1. Quality index of red wine

Core code analysis

Module / buildmodel.py (interface, can run directly)

Establish grey relation model, input data can choose whether standardization is needed, the result is saved in the result of the object, and the code can run directly

# -*- coding: utf-8 -*-
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import os

class GraModel():
    '''Grey relation analysis model'''
    def __init__(self,inputData,p=0.5,standard=True):
        '''
        //Initialization parameters
        inputData: Input matrix, vertical axis is attribute name, first column is parent sequence
        p: Resolution coefficient, range 0~1,Generally take 0.5,The smaller the correlation coefficient is, the greater the difference is, and the stronger the discrimination ability is
        standard: Need standardization
        '''
        self.inputData = np.array(inputData)
        self.p = p
        self.standard = standard
        #Standardization
        self.standarOpt()
        #modeling
        self.buildModel()
        
    def standarOpt(self):
        '''Standardized input data'''
        if not self.standard:
            return None
        self.scaler = StandardScaler().fit(self.inputData) 
        self.inputData = self.scaler.transform(self.inputData)
        
    def buildModel(self):
        #The first column is the parent column, and the absolute difference with other columns is obtained
        momCol = self.inputData[:,0]
        sonCol = self.inputData[:,0:]
        for col in range(sonCol.shape[1]):
            sonCol[:,col] = abs(sonCol[:,col]-momCol)
        #Finding the minimum and maximum difference of two levels
        minMin = sonCol.min()
        maxMax = sonCol.max()
        #Calculation of correlation coefficient matrix
        cors = (minMin + self.p*maxMax)/(sonCol+self.p*maxMax)
        #Find the average comprehensive correlation degree
        meanCors = cors.mean(axis=0)
        self.result = {'cors':{'value':cors,'desc':'Correlation coefficient matrix'},'meanCors':{'value':meanCors,'desc':'Average comprehensive correlation coefficient'}}

if __name__ == "__main__":
    #Path directory
    curDir = os.path.dirname(os.path.abspath(__file__))#current directory
    baseDir = os.path.dirname(curDir)#root directory
    staticDir = os.path.join(baseDir,'Static')#Static file directory
    resultDir = os.path.join(baseDir,'Result')#Results file directory
    #Reading
    data = [
    [1,1.1,2,2.25,3,4],
    [1,1.166,1.834,2,2.314,3],
    [1,1.125,1.075,1.375,1.625,1.75],
    [1,1,0.7,0.8,0.9,1.2]        
    ]
    data = np.array(data).T
    #modeling
    model = GraModel(data,standard=True)
    print(model.result)

Interface call and operation effect

Main.py

Call the interface of BuildModel.py, analyze the example, and get the correlation between red wine quality and red wine PH and other indicators.

# -*- coding: utf-8 -*-
from Module.BuildModel import GraModel
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
#Path directory
baseDir = os.path.dirname(os.path.abspath(__file__))#current directory
staticDir = os.path.join(baseDir,'Static')#Static file directory
resultDir = os.path.join(baseDir,'Result')#Results file directory

#The first column of interface requirements is the parent sequence, i.e. red wine quality
data = pd.read_csv(staticDir+'/winequality-red.csv',sep=';')
columns = ['quality','fixed acidity', 'volatile acidity',
           'citric acid', 'residual sugar','chlorides', 
           'free sulfur dioxide', 'total sulfur dioxide', 
           'density','pH', 'sulphates', 'alcohol']
data = data[columns]

#Establish grey relation model and standardize data
model = GraModel(data,standard=True)
#Model calculation results
result = model.result
#Average relevance
meanCors = result['meanCors']['value']

#Visualization chart
#Used to display Chinese labels normally
plt.rcParams['font.sans-serif']=['SimHei'] 
#Used to display negative sign normally
plt.rcParams['axes.unicode_minus']=False

#Visualization matrix
plt.clf()
plt.figure(figsize=(8,12))
sns.heatmap(meanCors.reshape(1,-1), square=True, annot=True,  cbar=False,
            vmax=1.0,
            linewidths=0.1,cmap='viridis')
plt.yticks([0,],['quality'])
plt.xticks(np.arange(0.5,12.5,1),columns,rotation=90)
plt.title('Index correlation matrix')
plt.savefig(resultDir+'/Visualization matrix of index correlation degree.png',dpi=100,bbox_inches='tight')

Figure 2. Thermodynamic diagram of correlation matrix

Published 8 original articles, won praise 4, visited 5964
Private letter follow

Posted by azwebdiva on Mon, 17 Feb 2020 20:20:46 -0800