Task03: logical regression

Keywords: Python

Theoretical part

  • The relation and difference between logical regression and linear regression
  • Model building: principle and model of logical regression
  • Learning strategy: loss function, derivation and optimization of logistic regression
  • Algorithmic solution: batch gradient descent
  • Regularization and model evaluation index
  • Advantages and disadvantages of logical regression
  • Sample imbalance
  • Detailed explanation of sklearn parameters
  1. Case study:
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    
    %matplotlib inline
    
    
    df_X = pd.read_csv('./logistic_x.txt', sep='\ +',header=None, engine='python') #Read X value
    ys = pd.read_csv('./logistic_y.txt', sep='\ +',header=None, engine='python') #Read y value
    ys = ys.astype(int)
    df_X['label'] = ys[0].values #Label X one by one according to the result of y value
    
    ax = plt.axes()
    #Describe the position of point X in the two-dimensional graph, and visually view the distribution of data points
    df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
    df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
    
    #Extract data for learning
    Xs = df_X[[0, 1]].values
    Xs = np.hstack([np.ones((Xs.shape[0], 1)), Xs])
    ys = df_X['label'].values
    
    
    from __future__ import print_function
    import numpy as np
    from sklearn.linear_model import LogisticRegression
    
    lr = LogisticRegression(fit_intercept=False) #Because the value of the intercept item has been merged into the variable previously, the intercept item is not required for parameter setting here
    lr.fit(Xs, ys) #fitting
    score = lr.score(Xs, ys) #Result evaluation
    print("Coefficient: %s" % lr.coef_)
    print("Score: %s" % score)
    
    
    ax = plt.axes()
    
    df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
    df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
    
    _xs = np.array([np.min(Xs[:,1]), np.max(Xs[:,1])])
    
    #The data is described in the form of two-dimensional graph, and the data area is divided with the parameter result obtained by learning as the threshold value
    _ys = (lr.coef_[0][0] + lr.coef_[0][1] * _xs) / (- lr.coef_[0][2])
    plt.plot(_xs, _ys, lw=1)
    
    
    class LGR_GD():
        def __init__(self):
            self.w = None
            self.n_iters = None
    
        def fit(self, X, y, alpha=0.03, loss=1e-10):  # Set the step size to 0.002, and judge whether the convergence condition is 1e-10
            y = y.reshape(-1, 1)  # Reshape the dimension of y value for matrix operation
            [m, d] = np.shape(X)  # Dimensions of arguments
            self.w = np.zeros((1, d))  # Set the initial value of the parameter to 0
            tol = 1e5
            self.n_iters = 0
            # ============================= show me your code =======================
            while tol > loss: #Set convergence conditions
                for i in range(d):
                    temp = y - X.dot(self.w)
                    self.w[i] = self.w[i] + alpha *np.sum(temp * X[:,i])/m
                
                tol = np.abs(np.sum(y -  X.dot(self.w)))
                self.n_iters += 1 #Update iterations
     
            # ============================= show me your code =======================
    
        def predict(self, X):
            # Prediction of new independent variables with fitted parameter values
            y_pred = X.dot(self.w)
            return y_pred
    
    
    if __name__ == "__main__":
        lr_gd = LGR_GD()
        lr_gd.fit(Xs, ys)
    
        ax = plt.axes()
    
        df_X.query('label == 0').plot.scatter(x=0, y=1, ax=ax, color='blue')
        df_X.query('label == 1').plot.scatter(x=0, y=1, ax=ax, color='red')
    
        _xs = np.array([np.min(Xs[:, 1]), np.max(Xs[:, 1])])
        _ys = (lr_gd.w[0][0] + lr_gd.w[0][1] * _xs) / (- lr_gd.w[0][2])
        plt.plot(_xs, _ys, lw=1)
    
    
    class LGR_NT():
        def __init__(self):
            self.w = None
            self.n_iters = None
    
        def fit(self, X, y, loss=1e-10):  # The condition of convergence is 1e-10
            y = y.reshape(-1, 1)  # Reshape the dimension of y value for matrix operation
            [m, d] = np.shape(X)  # Dimensions of arguments
            self.w = np.zeros((1, d))  # Set the initial value of the parameter to 0
            tol = 1e5
            n_iters = 0
            Hessian = np.zeros((d, d))
            # ============================= show me your code =======================
            while tol > loss:
                n_iters += 1
            # ============================= show me your code =======================
            self.w = theta
            self.n_iters = n_iters
    
        def predict(self, X):
            # Prediction of new independent variables with fitted parameter values
            y_pred = X.dot(self.w)
            return y_pred
    
    
    if __name__ == "__main__":
        lgr_nt = LGR_NT()
        lgr_nt.fit(Xs, ys)
    
    
    
    
    
    print("Gradient descent method result parameters:%s;Iteration times of gradient descent method:%s" %(lgr_gd.w,lgr_gd.n_iters))
    print("Newton method result parameters:%s;Iterations of Newton method:%s" %(lgr_nt.w,lgr_nt.n_iters))
    

     

Published 29 original articles, won praise 4, visited 50000+
Private letter follow

Posted by SkillsToShow on Mon, 13 Jan 2020 09:26:13 -0800