Common functions of Numpy
1. Read matrix file
xxx,xxx,xxx,xxx
xxx,xxx,xxx,xxx
xxx,xxx,xxx,xxx
It consists of data items in several rows and columns. The number of items in each row must be equal, the type of data items in each column must be the same, and there is a clear separator between data items.
np.loadtxt(
File path,
Delimiter = delimiter string,
usecols = select column set,
unpack = expand by column (default False),
dtype = target type (default float),
converters = converter Dictionary) - >
A two-dimensional (unpack=False) or multiple one-dimensional arrays (unpack=True)
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # Converter function to change day in day month year format to year month day format def dmy2ymd(dmy): # Convert UTF-8 encoded byte string to UCS-4 encoded string dmy = str(dmy, encoding='utf-8') ''' d, m, y = dmy.split('-') ymd = y + "-" + m + "-" + d ''' # Resolve date string in day month year format to datetime # Object of type, and then the date sub object of type date date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() # Format date object of type date # String in the form of year month day ymd = date.strftime('%Y-%m-%d') return ymd # Read Apple's # Stock price: opening price, highest price, lowest price and closing price dates, opening_prices, highest_prices, \ lowest_prices, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=",", usecols=(1, 3, 4, 5, 6), unpack=True, dtype='M8[D], f8, f8, f8, f8', converters={1: dmy2ymd}) mp.figure('Candlestick', facecolor='lightgray') mp.title('Candlestick', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() # Major scale represents Monday of each week ax.xaxis.set_major_locator( md.WeekdayLocator(byweekday=md.MO)) # Sub scale means every day ax.xaxis.set_minor_locator(md.DayLocator()) # Set label format of major scale: day month (abbreviated in English) year ax.xaxis.set_major_formatter( md.DateFormatter('%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(axis='y', linestyle=':') # Numpy.datetime64[D]-> # Matplotlib.dates.datetime.datetime dates = dates.astype(md.datetime.datetime) rise = closing_prices - opening_prices >= 0.01 fall = opening_prices - closing_prices >= 0.01 fc = np.zeros(dates.size, dtype='3f4') ec = np.zeros(dates.size, dtype='3f4') fc[rise], fc[fall] = (1, 1, 1), (0, 0.5, 0) ec[rise], ec[fall] = (1, 0, 0), (0, 0.5, 0) mp.bar(dates, highest_prices - lowest_prices, 0, lowest_prices, color=fc, edgecolor=ec) mp.bar(dates, closing_prices - opening_prices, 0.8, opening_prices, color=fc, edgecolor=ec) mp.gcf().autofmt_xdate() mp.show()
Mask mask array
2. Arithmetic mean
Sample: S = [S1, S2,..., Sn]
Arithmetic mean: m = (s1+s2+...+sn)/n
s1 = s + d1
s2 = s + d2
...
sn = s + dn
m = s + (d1+d2+...+dn)/n
n->oo: (d1+d2+...+dn)/n->0
The arithmetic mean is the unbiased estimation of the true value when the number of samples is enough.
Np.mean (sample array) - > arithmetic mean
Sample array. Mean() - > arithmetic mean
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(6), unpack=True) mean = 0 for closing_price in closing_prices: mean += closing_price mean /= closing_prices.size print(mean) mean = np.mean(closing_prices) print(mean) mean = closing_prices.mean() print(mean)
3. Weighted average
Sample: S = [S1, S2,..., Sn]
Weight: w = [W1, W2,..., WN]
weighted average:
a = (s1w1+s2w2+...+snwn)/(w1+w2+...+wn)
The arithmetic mean is the weighted mean with equal weight
NP. Average (sample array, weights = weight array)
- > weighted average
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np closing_prices, volumes = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(6, 7), unpack=True) vwap, wsum = 0, 0 for closing_price, volume in zip( closing_prices, volumes): vwap += closing_price * volume wsum += volume vwap /= wsum print(vwap) vwap = np.average(closing_prices, weights=volumes) print(vwap)
Time: early -------- > late
Price: 10... 52 48 51 50
Weight: low -------- > High
?
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np def dmy2days(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() days = (date - dt.date.min).days return days days, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(1, 6), unpack=True, converters={1: dmy2days}) twap, wsum = 0, 0 for closing_price, day in zip( closing_prices, days): twap += closing_price * day wsum += day twap /= wsum print(twap) twap = np.average(closing_prices, weights=days) print(twap)
Time - > value
4. best value
np.max() \ find the most in an array
np.min() / high value or minimum value element
np.argmax() \ find the most in an array
np.argmin() / large or minimum subscript
np.maximum() \ the maximum value of corresponding positions in two arrays
np.minimum() / or minimum values are collected into a new array
np.ptp() - the range of an array -- the difference between the maximum element and the minimum element
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np # Generating 9 random numbers in [10, 100) interval which obey uniform distribution a = np.random.randint(10, 100, 9).reshape(3, 3) print(a) b, c = np.max(a), np.min(a) print(b, c) d, e = np.argmax(a), np.argmin(a) print(d, e) names = np.array(['zhangfei', 'zhaoyun', 'guanyu']) scores = np.array([70, 90, 80]) print(names[np.argmax(scores)]) f = np.random.randint(10, 100, 9).reshape(3, 3) print(f) g, h = np.maximum(a, f), np.minimum(a, f) print(g, h, sep='\n') i = np.ptp(a) print(i)
Price fluctuation range
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np highest_prices, lowest_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(4, 5), unpack=True) max_highest_price, min_lowest_price = \ highest_prices[0], lowest_prices[0] for highest_price, lowest_price in zip( highest_prices[1:], lowest_prices[1:]): if max_highest_price < highest_price: max_highest_price = highest_price if min_lowest_price > lowest_price: min_lowest_price = lowest_price print(max_highest_price - min_lowest_price) print(np.max(highest_prices) - np.min(lowest_prices))
Price fluctuation range
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np highest_prices, lowest_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(4, 5), unpack=True) max_highest_price, min_highest_price, \ max_lowest_price, min_lowest_price = \ highest_prices[0], highest_prices[0], \ lowest_prices[0], lowest_prices[0] for highest_price, lowest_price in zip( highest_prices[1:], lowest_prices[1:]): if max_highest_price < highest_price: max_highest_price = highest_price if min_highest_price > highest_price: min_highest_price = highest_price if max_lowest_price < lowest_price: max_lowest_price = lowest_price if min_lowest_price > lowest_price: min_lowest_price = lowest_price print(max_highest_price - min_highest_price, max_lowest_price - min_lowest_price) print(np.ptp(highest_prices), np.ptp(lowest_prices))
5. median
5000 3000 4000 6000 1 10000000000
1 3000 4000 5000 6000 10000000000
\____/
|
4500
(a[(6-1)/2] + a[6/2]) / 2
1 3000 4000 5000 10000000000
|
4000
(a[(5-1)/2] + a[5/2]) / 2
General formula: (a [(L-1) / 2] + a [L / 2] / 2
Np.median (array) - > median
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(6), unpack=True) sorted_prices = np.msort(closing_prices) l = len(sorted_prices) median = (sorted_prices[int((l - 1) / 2)] + sorted_prices[int(l / 2)]) / 2 print(median) median = np.median(closing_prices) print(median)
6. standard deviation
Sample: S = [S1, S2,..., Sn]
Mean value: M = (S1 + S2 +... + SN) / N - > true value
Dispersion: D = [D1, D2,..., DN], di = Si - M
Deviation square: q = [Q1, Q2,..., QN], Qi = Di ^ 2
(population) variance: v = (q1+q2+...+qn)/n
(overall) standard deviation: STD = sqrt (V) - > root mean square error, indicating the deviation degree of all samples from the true value. As an indicator of the dispersion of a set of random quantities
(sample) variance: V '= (Q1 + Q2 +... + QN) / (n-1)
(sample) standard deviation: STD '= sqrt (V')
Np.std (sample array, ddof = non degree of freedom (default 0)) - > standard deviation
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(6), unpack=True) # mean value mean = closing_prices.mean() # Deviation devs = closing_prices - mean # Population variance pvar = (devs ** 2).sum() / devs.size # Overall standard deviation pstd = np.sqrt(pvar) # Sample variance svar = (devs ** 2).sum() / (devs.size - 1) # Sample standard deviation sstd = np.sqrt(svar) print(pstd, sstd) pstd = np.std(closing_prices) sstd = np.std(closing_prices, ddof=1) print(pstd, sstd)
7. Weekly data
Mon Tue Wed Thu Fri
xxx xxx xxx xxx xxx
xxx xxx xxx xxx xxx
...
Np.where (condition) - > the subscript array of elements in the array that meet the condition
NP. Take (array, subscript array) - > a sub array of elements in an array corresponding to a subscript array
Array [mask array] - > the subarray of the elements in the array corresponding to the True element of the mask array
Calculate weekly average
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np def dmy2wday(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() wday = date.weekday() # 0-6 for Monday to Sunday return wday wdays, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(1, 6), unpack=True, converters={1: dmy2wday}) ave_closing_prices = np.zeros(5) for wday in range(len(ave_closing_prices)): ''' ave_closing_prices[wday] = np.take( closing_prices, np.where(wdays == wday)).mean() ave_closing_prices[wday] = closing_prices[ np.where(wdays == wday)].mean() ''' ave_closing_prices[wday] = closing_prices[ wdays == wday].mean() for wday, ave_closing_price in zip( ['MON', 'TUE', 'WED', 'THU', 'FRI'], ave_closing_prices): print(wday, np.round(ave_closing_price, 2))
NP. Apply along axis
The n-dimensional array is divided into several n-1-dimensional sub arrays according to the given axial direction as parameters to call the processing function, and its return value is recombined into an array to return
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np def foo(a): return a.sum() a = np.arange(1, 10).reshape(3, 3) print(a) b = np.apply_along_axis(foo, 0, a) print(b) c = np.apply_along_axis(foo, 1, a) print(c)
Statistics of weekly opening and closing prices
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np def dmy2wday(dmy): dmy = str(dmy, encoding='utf-8') date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() wday = date.weekday() # 0-6 for Monday to Sunday return wday wdays, opening_prices, highest_prices, \ lowest_prices, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=',', usecols=(1, 3, 4, 5, 6), unpack=True, converters={1: dmy2wday}) wdays = wdays[:16] opening_prices = opening_prices[:16] highest_prices = highest_prices[:16] lowest_prices = lowest_prices[:16] closing_prices = closing_prices[:16] first_monday = np.where(wdays == 0)[0][0] last_friday = np.where(wdays == 4)[0][-1] indices = np.arange(first_monday, last_friday + 1) indices = np.array(np.split(indices, 3)) def week_summary(indices): opening_price = opening_prices[indices[0]] highest_price = np.take( highest_prices, indices).max() lowest_price = np.take( lowest_prices, indices).min() closing_price = closing_prices[indices[-1]] return opening_price, highest_price, \ lowest_price, closing_price summaries = np.apply_along_axis( week_summary, 1, indices) np.savetxt('../../data/summary.csv', summaries, delimiter=',', fmt='%g')
8. One dimensional convolution
a = [1 2 3 4 5]
b = [6 7 8]
c = a @ b = [6 19 40 61 82 67 40] - full convolution
[19 40 61 82 67] - same dimensional convolution (same)
[40 61 82] - valid convolution
6 19 40 61 82 67 40
0 0 1 2 3 4 5 0 0
8 7 6
8 7 6
8 7 6
8 7 6
8 7 6
8 7 6
8 7 6
c = np.convolve(a, b, ['full']/'same'/'valid')
^ ^ ^
| |____ |
Convolution type of convoluted product group
Convolution kernel array
# -*- coding: utf-8 -*- from __future__ import unicode_literals import numpy as np a = np.array([1, 2, 3, 4, 5]) b = np.array([6, 7, 8]) print(np.convolve(a, b)) print(np.convolve(a, b, 'same')) print(np.convolve(a, b, 'valid'))
9. Moving average
a b c d e f g h i j k l m n
^^^ ^^
[1/5 1/5 1/5 1/5 1/5]
A B C D E -> S=A+B+C+D+E
(aA + bB + cC + dD +eE)/S
aA/S + bB/S +cC/S + dD/S + eE/S
[A/S B/S C/S D/S E/S]
# -*- coding: utf-8 -*- from __future__ import unicode_literals import datetime as dt import numpy as np import matplotlib.pyplot as mp import matplotlib.dates as md # Converter function to change day in day month year format to year month day format def dmy2ymd(dmy): # Convert UTF-8 encoded byte string to UCS-4 encoded string dmy = str(dmy, encoding='utf-8') ''' d, m, y = dmy.split('-') ymd = y + "-" + m + "-" + d ''' # Resolve date string in day month year format to datetime # Object of type, and then the date sub object of type date date = dt.datetime.strptime( dmy, '%d-%m-%Y').date() # Format date object of type date # String in the form of year month day ymd = date.strftime('%Y-%m-%d') return ymd # Read Apple's # Stock price: opening price, highest price, lowest price and closing price dates, closing_prices = np.loadtxt( '../../data/aapl.csv', delimiter=",", usecols=(1, 6), unpack=True, dtype='M8[D], f8', converters={1: dmy2ymd}) sma51 = np.zeros(closing_prices.size - 4) for i in range(sma51.size): sma51[i] = closing_prices[i:i + 5].mean() sma52 = np.convolve(closing_prices, np.ones(5) / 5, 'valid') sma10 = np.convolve(closing_prices, np.ones(10) / 10, 'valid') weights = np.exp(np.linspace(-1, 0, 5)) weights /= weights.sum() ema5 = np.convolve(closing_prices, weights[::-1], 'valid') mp.figure('Moving Average', facecolor='lightgray') mp.title('Moving Average', fontsize=20) mp.xlabel('Date', fontsize=14) mp.ylabel('Price', fontsize=14) ax = mp.gca() # Major scale represents Monday of each week ax.xaxis.set_major_locator( md.WeekdayLocator(byweekday=md.MO)) # Sub scale means every day ax.xaxis.set_minor_locator(md.DayLocator()) # Set label format of major scale: day month (abbreviated in English) year ax.xaxis.set_major_formatter( md.DateFormatter('%d %b %Y')) mp.tick_params(labelsize=10) mp.grid(linestyle=':') # Numpy.datetime64[D]-> # Matplotlib.dates.datetime.datetime dates = dates.astype(md.datetime.datetime) mp.plot(dates, closing_prices, c='lightgray', label='Closing Price') mp.plot(dates[4:], sma51, c='orangered', label='SMA-51') mp.plot(dates[4:], sma52, c='orangered', alpha=0.3, linewidth=6, label='SMA-52') mp.plot(dates[9:], sma10, c='dodgerblue', label='SMA-10') mp.plot(dates[4:], ema5, c='limegreen', label='EMA-5') mp.legend() mp.gcf().autofmt_xdate() mp.show()
For more courses, please follow SkrEric's programming class on wechat