Article directory
The use of numpy and the summary of common functions
NumPy
ndarray understands multidimensional arrays
import numpy as np # Generate random multidimensional data of specified dimension #Mathematical modeling should not be used data = np.random.rand(2, 3) print (data) print (type(data)) #Type is the display data type; shape display dimension; number of ndim dimensions; other functions of type
[[0.46686682 0.68844304 0.76663872] [0.70747721 0.47887587 0.25943412]] <class 'numpy.ndarray'>
ndim, shape and dtype properties
print ('Dimension number', data.ndim) print ('Dimensions: ', data.shape) print ('data type: ', data.dtype)
Number of dimensions 2 Dimensions: (2, 3) Data type: float64
Create ndarray
'''1. array Establish''' # list to ndarray l = range(10) data = np.array(l) print (data) print (data.shape) print (data.ndim)
[0 1 2 3 4 5 6 7 8 9] (10,) 1
# Nested sequence to ndarray l2 = [range(10), range(10)] #So it forms an array data = np.array(l2) print (data) print (data.shape)
[[0 1 2 3 4 5 6 7 8 9] [0 1 2 3 4 5 6 7 8 9]] (2, 10)
'''2. zeros;ones;empty Establish''' # np.zeros zeros_arr = np.zeros((3, 4)) #Pay attention to tuples. Errors are often reported here # np.ones ones_arr = np.ones((2, 3)) # np.empty [not all zero, and some random numbers] empty_arr = np.empty((3, 3)) # np.empty specifies the data type empty_int_arr = np.empty((3, 3), int) print (zeros_arr) print ('-------------') print (ones_arr) print ('-------------') print (empty_arr) print ('-------------') print (empty_int_arr)
[[0. 0. 0. 0.] [0. 0. 0. 0.] [0. 0. 0. 0.]] ------------- [[1. 1. 1.] [1. 1. 1.]] ------------- [[0.000e+000 0.000e+000 0.000e+000] [0.000e+000 0.000e+000 2.174e-321] [0.000e+000 0.000e+000 0.000e+000]] ------------- [[0 0 0] [0 0 0] [0 0 0]]
# np.arange() #Creating a series of consecutive numbers is a function similar to range in python in numpy print (np.arange(10))
[0 1 2 3 4 5 6 7 8 9]
Operation nddarray
Vectorization
# Vector and vector operation arr = np.array([[1, 2, 3], [4, 5, 6]]) print ("Multiply between elements:") #Pay attention to the operation between discernible matrices. The vector acid here is the same as the broadcast operation print (arr * arr) print ("Matrix addition:") print (arr + arr)
Multiply between elements: [[ 1 4 9] [16 25 36]] Matrix addition: [[ 2 4 6] [ 8 10 12]]
# Vector and scalar operations print (1. / arr) print (2. * arr)
[[1. 0.5 0.33333333] [0.25 0.2 0.16666667]] [[ 2. 4. 6.] [ 8. 10. 12.]]
Index and slice
# One-dimensional array arr1 = np.arange(10) print (arr1) print (arr1[2:5])
[0 1 2 3 4 5 6 7 8 9] [2 3 4]
# Multidimensional array arr2 = np.arange(12).reshape(3,4) #To learn how to define multidimensional arrays, range is to form 12 random numbers, and then reshape is to form dimensions #For example, 3.4.5 = length 4 width 5 height 3 #And points are application functions print (arr2)
[[ 0 1 2 3] [ 4 5 6 7] [ 8 9 10 11]]
print (arr2[1]) print (arr2[0:2, 2:]) print (arr2[:, 1:3])
[4 5 6 7] [[2 3] [6 7]] [[ 1 2] [ 5 6] [ 9 10]]
# Conditional index # Find out the data after 2015 in data_arr data_arr = np.random.rand(3,3) print (data_arr) year_arr = np.array([[2000, 2001, 2000], [2005, 2002, 2009], [2001, 2003, 2010]]) is_year_after_2005 = year_arr >= 2005 #: it will be expanded to an array of the same type print (is_year_after_2005, is_year_after_2005.dtype) filtered_arr = data_arr[is_year_after_2005] filtered_arr = data_arr[year_arr >= 2005] print (filtered_arr) #Some statements in the middle can be deleted #The final result is a one-dimensional array, which is very useful for data filtering
[[0.61482194 0.0249229 0.28525661] [0.05121173 0.37672803 0.86259463] [0.22648329 0.4581513 0.18620441]] [[False False False] [ True False True] [False False True]] bool [0.05121173 0.86259463 0.18620441]
# Multiple conditions &| filtered_arr = data_arr[(year_arr <= 2005) & (year_arr % 2 == 0)] print (filtered_arr)
[0.61482194 0.28525661 0.37672803]
Transpose = = = transpose
arr = np.random.rand(2,3) print (arr) print (arr.transpose())
[[0.01538974 0.47573964 0.90684253] [0.93683601 0.64306611 0.63846634]] [[0.01538974 0.93683601] [0.47573964 0.64306611] [0.90684253 0.63846634]]
#Conversion of high dimensional array (conversion dimension will be used in the image) #Don't understand here!!! arr3d = np.random.rand(2,3,4) print (arr3d) print ('----------------------') print (arr3d.transpose((1,0,2))) # Transposes and definitions of multidimensional arrays do not
[[[0.18074837 0.64652003 0.80527972 0.67800268] [0.95766577 0.2498768 0.00304503 0.7058178 ] [0.12523549 0.18796252 0.72463798 0.15352211]] [[0.38808013 0.31075033 0.53082474 0.32254431] [0.6861262 0.02999367 0.70980993 0.09099878] [0.14987301 0.78237398 0.90159408 0.82897071]]] ---------------------- [[[0.18074837 0.64652003 0.80527972 0.67800268] [0.38808013 0.31075033 0.53082474 0.32254431]] [[0.95766577 0.2498768 0.00304503 0.7058178 ] [0.6861262 0.02999367 0.70980993 0.09099878]] [[0.12523549 0.18796252 0.72463798 0.15352211] [0.14987301 0.78237398 0.90159408 0.82897071]]]
ndarray data type conversion = = = astype
zeros_float_arr = np.zeros((3, 4), dtype=np.float64) print (zeros_float_arr) print (zeros_float_arr.dtype) # astype conversion data type zeros_int_arr = zeros_float_arr.astype(np.int32) print (zeros_int_arr) print (zeros_int_arr.dtype)
[[0. 0. 0. 0.] [0. 0. 0. 0.] [0. 0. 0. 0.]] float64 [[0 0 0 0] [0 0 0 0] [0 0 0 0]] int32
Reading text file
# loadtxt filename = './presidential_polls.csv' data_array = np.loadtxt(filename, # file name delimiter=',', # Specify the element separator inside dtype=str, # Specify data type usecols=(0,2,3)) # Specifies the column index number to read print (data_array, data_array.shape)
[['cycle' 'type' 'matchup'] ['2016' '"polls-plus"' '"Clinton vs. Trump vs. Johnson"'] ['2016' '"polls-plus"' '"Clinton vs. Trump vs. Johnson"'] ... ['2016' '"polls-only"' '"Clinton vs. Trump vs. Johnson"'] ['2016' '"polls-only"' '"Clinton vs. Trump vs. Johnson"'] ['2016' '"polls-only"' '"Clinton vs. Trump vs. Johnson"']] (10237, 3)
# Loadtext, explicitly specifying the type of data in each column filename = './presidential_polls.csv' data_array = np.loadtxt(filename, # file name delimiter=',', # Separator skiprows=1, dtype={'names':('cycle', 'type', 'matchup'), 'formats':('i4', 'S15', 'S50')}, # data type usecols=(0,2,3)) # Specifies the column index number to read print (data_array, data_array.shape) # The result of reading is a one-dimensional array, each element is a tuple
[(2016, b'"polls-plus"', b'"Clinton vs. Trump vs. Johnson"') (2016, b'"polls-plus"', b'"Clinton vs. Trump vs. Johnson"') (2016, b'"polls-plus"', b'"Clinton vs. Trump vs. Johnson"') ... (2016, b'"polls-only"', b'"Clinton vs. Trump vs. Johnson"') (2016, b'"polls-only"', b'"Clinton vs. Trump vs. Johnson"') (2016, b'"polls-only"', b'"Clinton vs. Trump vs. Johnson"')] (10236,)
Common functions of np
transpose
import numpy as np
arr = np.random.rand(2,3) print (arr) print (arr.transpose())
[[0.78485041 0.88817969 0.34809014] [0.32744286 0.97539301 0.94401872]] [[0.78485041 0.32744286] [0.88817969 0.97539301] [0.34809014 0.94401872]]
#Conversion of high dimensional array (conversion dimension will be used in the image) #Don't understand here!!! arr3d = np.random.rand(2,3,4) print (arr3d) print ('----------------------') print (arr3d.transpose((1,0,2))) # Transposes and definitions of multidimensional arrays do not
[[[0.28492549 0.60197236 0.45582367 0.21992479] [0.1747163 0.69201365 0.85460359 0.65311699] [0.62189644 0.25217555 0.16347156 0.29831219]] [[0.42826733 0.81396165 0.187138 0.560564 ] [0.10162186 0.66419751 0.03261665 0.06969256] [0.55461652 0.55020586 0.50693591 0.31741807]]] ---------------------- [[[0.28492549 0.60197236 0.45582367 0.21992479] [0.42826733 0.81396165 0.187138 0.560564 ]] [[0.1747163 0.69201365 0.85460359 0.65311699] [0.10162186 0.66419751 0.03261665 0.06969256]] [[0.62189644 0.25217555 0.16347156 0.29831219] [0.55461652 0.55020586 0.50693591 0.31741807]]]
ceil and floor and rint and isnan
arr = np.random.randn(2,3) print (arr) print (np.ceil(arr)) #Up nearest integer print (np.floor(arr)) #Down nearest integer print (np.rint(arr)) #Rounding print (np.isnan(arr)) #Determine whether the element is NaN #There are other functions on the notes
[[ 0.262106 -1.33680008 -1.08562543] [ 0.3990978 0.1410074 0.64278274]] [[ 1. -1. -1.] [ 1. 1. 1.]] [[ 0. -2. -2.] [ 0. 0. 0.]] [[ 0. -1. -1.] [ 0. 0. 1.]] [[False False False] [False False False]]
where
arr = np.random.randn(3,4) print (arr) np.where(arr > 0, 1, -1) #(condition, output satisfied, output not satisfied)
[[ 2.04688394 0.48063737 1.20876913 -0.93412937] [-0.43427472 -1.47755481 0.36882256 -0.08943138] [-0.2847686 0.96915893 0.32641235 0.28346922]] array([[ 1, 1, 1, -1], [-1, -1, 1, -1], [-1, 1, 1, 1]])
sum
arr = np.arange(10).reshape(5,2) print (arr) print (np.sum(arr)) print (np.sum(arr, axis=0)) print (np.sum(arr, axis=1))
[[0 1] [2 3] [4 5] [6 7] [8 9]] 45 [20 25] [ 1 5 9 13 17]
all and any
import numpy as np arr = np.random.randn(2,3) print (arr) print (np.any(arr > 0)) #One is right print (np.all(arr > 0)) #All right ''' ·The purpose is to judge whether or not a group of data===It's kind of Boolean ·This can also be applied to pandas Medium DataFrame in '''
[[-1.020184 -0.48466272 -0.8496271 ] [ 0.88815825 -0.81911857 0.64570539]] True False '\n·The purpose is to judge whether or not a group of data===It's kind of Boolean\n·This can also be applied to pandas Medium DataFrame in\n'
unique
arr = np.array([[1, 2, 1], [2, 3, 4]]) print (arr) print (np.unique(arr))
[[1 2 1] [2 3 4]] [1 2 3 4]