The use and summary of numpy

Keywords: Python

Article directory

NumPy

ndarray understands multidimensional arrays

ndim, shape and dtype properties

Create ndarray
Operation nddarray

Vectorization
Index and slice
Transpose = = = transpose
ndarray data type conversion = = = astype

Reading text file

Common functions of np

transpose
ceil and floor and rint and isnan
where
sum
all and any
unique

The use of numpy and the summary of common functions

NumPy

ndarray understands multidimensional arrays

import numpy as np

# Generate random multidimensional data of specified dimension
#Mathematical modeling should not be used
data = np.random.rand(2, 3)
print (data)
print (type(data))
#Type is the display data type; shape display dimension; number of ndim dimensions; other functions of type

[[0.46686682 0.68844304 0.76663872]
 [0.70747721 0.47887587 0.25943412]]
<class 'numpy.ndarray'>

ndim, shape and dtype properties

print ('Dimension number', data.ndim)
print ('Dimensions: ', data.shape)
print ('data type: ', data.dtype)

Number of dimensions 2
 Dimensions: (2, 3)
Data type: float64

Create ndarray

'''1. array Establish'''
# list to ndarray
l = range(10)
data = np.array(l)
print (data)
print (data.shape)
print (data.ndim)

[0 1 2 3 4 5 6 7 8 9]
(10,)
1

# Nested sequence to ndarray
l2 = [range(10), range(10)]
#So it forms an array
data = np.array(l2)
print (data)
print (data.shape)

[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]
(2, 10)

'''2. zeros;ones;empty Establish'''

# np.zeros
zeros_arr = np.zeros((3, 4))
#Pay attention to tuples. Errors are often reported here
# np.ones
ones_arr = np.ones((2, 3))

# np.empty [not all zero, and some random numbers]
empty_arr = np.empty((3, 3))

# np.empty specifies the data type
empty_int_arr = np.empty((3, 3), int)

print (zeros_arr)
print ('-------------')
print (ones_arr)
print ('-------------')
print (empty_arr)
print ('-------------')
print (empty_int_arr)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
-------------
[[1. 1. 1.]
 [1. 1. 1.]]
-------------
[[0.000e+000 0.000e+000 0.000e+000]
 [0.000e+000 0.000e+000 2.174e-321]
 [0.000e+000 0.000e+000 0.000e+000]]
-------------
[[0 0 0]
 [0 0 0]
 [0 0 0]]

# np.arange()
#Creating a series of consecutive numbers is a function similar to range in python in numpy
print (np.arange(10))

[0 1 2 3 4 5 6 7 8 9]

Operation nddarray

Vectorization

# Vector and vector operation
arr = np.array([[1, 2, 3],
                [4, 5, 6]])

print ("Multiply between elements:")
#Pay attention to the operation between discernible matrices. The vector acid here is the same as the broadcast operation
print (arr * arr)

print ("Matrix addition:")
print (arr + arr)

Multiply between elements:
[[ 1  4  9]
 [16 25 36]]
Matrix addition:
[[ 2  4  6]
 [ 8 10 12]]

# Vector and scalar operations
print (1. / arr)
print (2. * arr)

[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
[[ 2.  4.  6.]
 [ 8. 10. 12.]]

Index and slice

# One-dimensional array
arr1 = np.arange(10)
print (arr1)

print (arr1[2:5])

[0 1 2 3 4 5 6 7 8 9]
[2 3 4]

# Multidimensional array
arr2 = np.arange(12).reshape(3,4)
#To learn how to define multidimensional arrays, range is to form 12 random numbers, and then reshape is to form dimensions
#For example, 3.4.5 = length 4 width 5 height 3
#And points are application functions
print (arr2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

print (arr2[1])

print (arr2[0:2, 2:])

print (arr2[:, 1:3])

[4 5 6 7]
[[2 3]
 [6 7]]
[[ 1  2]
 [ 5  6]
 [ 9 10]]

# Conditional index

# Find out the data after 2015 in data_arr
data_arr = np.random.rand(3,3)
print (data_arr)

year_arr = np.array([[2000, 2001, 2000],
                     [2005, 2002, 2009],
                     [2001, 2003, 2010]])

is_year_after_2005 = year_arr >= 2005
#: it will be expanded to an array of the same type
print (is_year_after_2005, is_year_after_2005.dtype)

filtered_arr = data_arr[is_year_after_2005]

filtered_arr = data_arr[year_arr >= 2005]
print (filtered_arr)
#Some statements in the middle can be deleted
#The final result is a one-dimensional array, which is very useful for data filtering

[[0.61482194 0.0249229  0.28525661]
 [0.05121173 0.37672803 0.86259463]
 [0.22648329 0.4581513  0.18620441]]
[[False False False]
 [ True False  True]
 [False False  True]] bool
[0.05121173 0.86259463 0.18620441]

# Multiple conditions &|
filtered_arr = data_arr[(year_arr <= 2005) & (year_arr % 2 == 0)]
print (filtered_arr)

[0.61482194 0.28525661 0.37672803]

Transpose = = = transpose

arr = np.random.rand(2,3)
print (arr)
print (arr.transpose())

[[0.01538974 0.47573964 0.90684253]
 [0.93683601 0.64306611 0.63846634]]
[[0.01538974 0.93683601]
 [0.47573964 0.64306611]
 [0.90684253 0.63846634]]

#Conversion of high dimensional array (conversion dimension will be used in the image)
#Don't understand here!!!
arr3d = np.random.rand(2,3,4)
print (arr3d)
print ('----------------------')
print (arr3d.transpose((1,0,2))) # Transposes and definitions of multidimensional arrays do not

[[[0.18074837 0.64652003 0.80527972 0.67800268]
  [0.95766577 0.2498768  0.00304503 0.7058178 ]
  [0.12523549 0.18796252 0.72463798 0.15352211]]

 [[0.38808013 0.31075033 0.53082474 0.32254431]
  [0.6861262  0.02999367 0.70980993 0.09099878]
  [0.14987301 0.78237398 0.90159408 0.82897071]]]
----------------------
[[[0.18074837 0.64652003 0.80527972 0.67800268]
  [0.38808013 0.31075033 0.53082474 0.32254431]]

 [[0.95766577 0.2498768  0.00304503 0.7058178 ]
  [0.6861262  0.02999367 0.70980993 0.09099878]]

 [[0.12523549 0.18796252 0.72463798 0.15352211]
  [0.14987301 0.78237398 0.90159408 0.82897071]]]

ndarray data type conversion = = = astype

zeros_float_arr = np.zeros((3, 4), dtype=np.float64)
print (zeros_float_arr)
print (zeros_float_arr.dtype)

# astype conversion data type
zeros_int_arr = zeros_float_arr.astype(np.int32)
print (zeros_int_arr)
print (zeros_int_arr.dtype)

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
float64
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
int32

Reading text file

# loadtxt
filename = './presidential_polls.csv'
data_array = np.loadtxt(filename,      # file name
                        delimiter=',', # Specify the element separator inside
                        dtype=str,     # Specify data type
                        usecols=(0,2,3)) # Specifies the column index number to read
print (data_array, data_array.shape)

[['cycle' 'type' 'matchup']
 ['2016' '"polls-plus"' '"Clinton vs. Trump vs. Johnson"']
 ['2016' '"polls-plus"' '"Clinton vs. Trump vs. Johnson"']
 ...
 ['2016' '"polls-only"' '"Clinton vs. Trump vs. Johnson"']
 ['2016' '"polls-only"' '"Clinton vs. Trump vs. Johnson"']
 ['2016' '"polls-only"' '"Clinton vs. Trump vs. Johnson"']] (10237, 3)

# Loadtext, explicitly specifying the type of data in each column
filename = './presidential_polls.csv'
data_array = np.loadtxt(filename,      # file name
                        delimiter=',', # Separator
                        skiprows=1,
                        dtype={'names':('cycle', 'type', 'matchup'),
                               'formats':('i4', 'S15', 'S50')},     # data type
                        usecols=(0,2,3)) # Specifies the column index number to read

print (data_array, data_array.shape) # The result of reading is a one-dimensional array, each element is a tuple

[(2016, b'"polls-plus"', b'"Clinton vs. Trump vs. Johnson"')
 (2016, b'"polls-plus"', b'"Clinton vs. Trump vs. Johnson"')
 (2016, b'"polls-plus"', b'"Clinton vs. Trump vs. Johnson"') ...
 (2016, b'"polls-only"', b'"Clinton vs. Trump vs. Johnson"')
 (2016, b'"polls-only"', b'"Clinton vs. Trump vs. Johnson"')
 (2016, b'"polls-only"', b'"Clinton vs. Trump vs. Johnson"')] (10236,)

Common functions of np

transpose

import numpy as np

arr = np.random.rand(2,3)
print (arr)
print (arr.transpose())

[[0.78485041 0.88817969 0.34809014]
 [0.32744286 0.97539301 0.94401872]]
[[0.78485041 0.32744286]
 [0.88817969 0.97539301]
 [0.34809014 0.94401872]]

#Conversion of high dimensional array (conversion dimension will be used in the image)
#Don't understand here!!!
arr3d = np.random.rand(2,3,4)
print (arr3d)
print ('----------------------')
print (arr3d.transpose((1,0,2))) # Transposes and definitions of multidimensional arrays do not

[[[0.28492549 0.60197236 0.45582367 0.21992479]
  [0.1747163  0.69201365 0.85460359 0.65311699]
  [0.62189644 0.25217555 0.16347156 0.29831219]]

 [[0.42826733 0.81396165 0.187138   0.560564  ]
  [0.10162186 0.66419751 0.03261665 0.06969256]
  [0.55461652 0.55020586 0.50693591 0.31741807]]]
----------------------
[[[0.28492549 0.60197236 0.45582367 0.21992479]
  [0.42826733 0.81396165 0.187138   0.560564  ]]

 [[0.1747163  0.69201365 0.85460359 0.65311699]
  [0.10162186 0.66419751 0.03261665 0.06969256]]

 [[0.62189644 0.25217555 0.16347156 0.29831219]
  [0.55461652 0.55020586 0.50693591 0.31741807]]]

ceil and floor and rint and isnan

arr = np.random.randn(2,3)

print (arr)
print (np.ceil(arr))
#Up nearest integer
print (np.floor(arr))
#Down nearest integer
print (np.rint(arr))
#Rounding
print (np.isnan(arr))
#Determine whether the element is NaN
#There are other functions on the notes

[[ 0.262106   -1.33680008 -1.08562543]
 [ 0.3990978   0.1410074   0.64278274]]
[[ 1. -1. -1.]
 [ 1.  1.  1.]]
[[ 0. -2. -2.]
 [ 0.  0.  0.]]
[[ 0. -1. -1.]
 [ 0.  0.  1.]]
[[False False False]
 [False False False]]

where

arr = np.random.randn(3,4)
print (arr)

np.where(arr > 0, 1, -1)
#(condition, output satisfied, output not satisfied)

[[ 2.04688394  0.48063737  1.20876913 -0.93412937]
 [-0.43427472 -1.47755481  0.36882256 -0.08943138]
 [-0.2847686   0.96915893  0.32641235  0.28346922]]





array([[ 1,  1,  1, -1],
       [-1, -1,  1, -1],
       [-1,  1,  1,  1]])

sum

arr = np.arange(10).reshape(5,2)
print (arr)

print (np.sum(arr))
print (np.sum(arr, axis=0))
print (np.sum(arr, axis=1))

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
45
[20 25]
[ 1  5  9 13 17]

all and any

import numpy as np
arr = np.random.randn(2,3)
print (arr)

print (np.any(arr > 0))
#One is right
print (np.all(arr > 0))
#All right
'''
·The purpose is to judge whether or not a group of data===It's kind of Boolean
·This can also be applied to pandas Medium DataFrame in
'''

[[-1.020184   -0.48466272 -0.8496271 ]
 [ 0.88815825 -0.81911857  0.64570539]]
True
False





'\n·The purpose is to judge whether or not a group of data===It's kind of Boolean\n·This can also be applied to pandas Medium DataFrame in\n'

unique

arr = np.array([[1, 2, 1], [2, 3, 4]])
print (arr)
print (np.unique(arr))

[[1 2 1]
 [2 3 4]]
[1 2 3 4]

Nietzsche's lyricism

Published 15 original articles, won praise 1, visited 39

Private letter follow

Posted by V_dirt_God on Fri, 13 Mar 2020 09:23:37 -0700

Programmer Group