Statistical algorithm - numerical / linear relationship measurement

Keywords: Python

Continue to count the algorithm, this time it's nothing special, it's not so deep, it's also relatively basic
1. Variance sample
2. Covariance (standard deviation) - Sample
3. Coefficient of variation
4. Correlation coefficient


It is still the first thing to build a list, this function is written as a function, convenient to call later, and the last written function will inherit this time.
def create_rand_list(min_num,max_num,count_list):
  case_list = []
  while len(case_list) < 10:
    rand_float = random.uniform(1,30)
    if rand_float in case_list:
      continue
    case_list.append(rand_float)
  case_list = [round(case,2) for case in case_list]
  return case_list


Here is the history function
sum_fun() #accumulation
len_fun() #Statistical number
multiply_fun() #Multiplicative multiplication
sum_mean_fun() #arithmetic mean
sum_mean_rate() #Calculate the average to calculate the return
median_fun() #Median
modes_fun() #Mode number
ext_minus_fun() #range
geom_mean_fun() #Geometric mean
geom_mean_rate() #Geometric average return

New function code

import random

# Sir, it's a random list,Existing functions, no more details
rand_list = [15.79, 6.83, 12.83, 22.32, 17.92, 6.29, 10.19, 10.13, 24.23, 25.56]

# 1,variance-sample S^2,list Each element in minus the whole list The sum of the squares of the average number of-1,Total variance not-1
def var_fun(rand_list):
  mean_num = sum_mean_fun(rand_list) #Calculate the average
  len_num = len_fun(rand_list) #Total computation
  var_list = [(x-mean_num)**2 for x in rand_list]
  var_sum = sum_fun(var_list)
  var_num = var_sum/(len_num - 1)
  return var_num

# 2,covariance(standard deviation)-sample S,This is simple. Just square the variance
def covar_fun(rand_list):
  var_num = var_fun(rand_list)
  covar_num = var_num ** 0.5
  return covar_num

# 3,Coefficient of variation CV,Measure of degree of variation, covariance/arithmetic mean*100%
# Explain(Baidu Encyclopedia): For statistical analysis of data, if the coefficient of variation is greater than 15%,The data may be abnormal and should be eliminated
def  trans_coef_fun(rand_list):
  covar_num = covar_fun(rand_list)
  mean_num = sum_mean_fun(rand_list)
  trans_coef_num = covar_num / mean_num
  return trans_coef_num

# 4,correlation coefficient-sample r,Represents a linear relationship between two dimensions,-1 < r < 1,The closer one is, the stronger the relationship between dimensions is
#    Because it's two dimensions, you need to input two-dimensional list,The algorithm is troublesome
'''
((x1-mean(x))(y1-mean(y))+(x2-mean(x))(y2-mean(y))+...(xn-mean(x))(yn-mean(y)))
/((x1-mean(x))^2+(x2-mean(x))^2+...(xn-mean(x))^2)^0.5*((y1-mean(y))^2+(y2-mean(y))^2+...(yn-mean(y))^2)^0.5
'''
x_list = rand_list
y_list = [4.39, 13.84, 9.21, 9.91, 15.69, 14.92, 25.77, 23.99, 8.15, 25.07]
def pearson_fun(x_list,y_list):
  x_mean = sum_mean_fun(x_list)
  y_mean = sum_mean_fun(y_list)
  len_num = len_fun(x_list)
  if len_num == len_fun(y_list)
    xy_multiply_list = [(x_list[i]-x_mean)*(y_list[i]-y_mean) for i in range(len_num)]
    xy_multiply_num = sum_fun(xy_multiply_list)
  else:
    print 'input list wrong,another input try'
    return 1
  x_covar_son_list = [(x-sum_mean_fun)**2 for x in x_list]
  y_covar_son_list = [(y-sum_mean_fun)**2 for y in y_list]
  x_covar_son_num = sum_fun(x_covar_son_list)
  y_covar_son_num = sum_fun(y_covar_son_list)
  xy_covar_son_multiply_num = (x_covar_son_num ** 0.5) * (y_covar_son_num ** 0.5)
  pearson_num = xy_multiply_num / xy_covar_son_multiply_num
  return pearson_num

Posted by Nuv on Thu, 09 Jan 2020 08:40:44 -0800