Linear regression hand tearing [^ 1]
%matplotlib inline import random import torch from d2l import torch as d2l
Construct dataset
y = X w + b + ϵ w = [ 2 , − 3.4 ] T b = 4.2 y = Xw+b+\epsilon\\ w = [2,-3.4]^T ~~ b = 4.2 y=Xw+b+ϵw=[2,−3.4]T b=4.2
def synstetic_data(w,b,num_examples): X = torch.normal(0,1,(num_examples,len(w))) y = torch.matmul(X,w) + b y += torch.normal(0,0.01,y.shape) return X, y.reshape((-1,1))
true_w = torch.tensor([2,-3.4]) true_b = 4.2 features, labels = synstetic_data(true_w,true_b,1000)
d2l.set_figsize() d2l.plt.scatter(features[:, 1].numpy(), labels.numpy(), 1)
Define read small batch function data_iter, batch in size_ size
def data_iter(batch_size,features,labels): num_examples = len(features) indices = list(range(num_examples))#Generate index for each sample random.shuffle(indices)#Random disruption for i in range(0, num_examples,batch_size): batch_indices = torch.tensor(indices[i:min(i + batch_size, num_examples)]) yield features[batch_indices],labels[batch_indices]#Generate function by index
batch_size = 10
Read the first small batch data sample and print it
for X,y in data_iter(batch_size, features, labels): print(X, '\n',y) break
tensor([[ 1.9921, -0.6007], [ 0.5512, -0.4175], [-0.8084, -1.1460], [-0.3088, -0.1136], [-1.7418, -0.4826], [ 0.8992, 1.9673], [ 0.5621, 1.2874], [-0.4990, 1.4661], [ 0.7456, 1.6236], [ 0.0331, 0.2446]]) tensor([[10.2283], [ 6.7240], [ 6.4990], [ 3.9876], [ 2.3645], [-0.6909], [ 0.9396], [-1.7839], [ 0.1708], [ 3.4304]])
Define model initialization parameters
Random initialization
w = torch.normal(0,0.01,size = (2,1),requires_grad = True) b = torch.zeros(1,requires_grad = True)
Define model
def linreg(X,w,b): """linear regression model """ return torch.matmul(X,w) + b
Define loss function
def squares_loss(y_hat,y): """Mean square error""" return (y_hat - y.reshape(y_hat.shape)) **2 / 2
Define optimization algorithm
- Randomly selected initial value w 0 w_0 w0
- Repeat iteration parameters
t
=
1
,
2
,
3
t = 1,2,3
t=1,2,3,
w t = w t − 1 − η ∂ l ∂ w t − 1 w_t = w_{t-1} - \eta \frac{\partial l}{\partial w_{t-1}} wt=wt−1−η∂wt−1∂l
def sgd(params, lr, batch_size):#Single sample """Small batch random descent""" with torch.no_grad():#No gradient is needed for updating, just addition and subtraction for param in params: param -= lr * param.grad / batch_size param.grad.zero_()
train
Specified super parameter
lr = 0.001 num_epochs = 3 net = linreg loss = squares_loss
for epoch in range(num_epochs): for X,y in data_iter(batch_size, features, labels): l = loss(net(X, w, b),y)#At this time, such loss is a batch_size long vector l.sum().backward()#Construct scalar (sum) derivative sgd([w,b],lr,batch_size) #Small batch update with gradient descent with torch.no_grad(): train_l = loss(net(features, w, b),labels) print(f'epoch {epoch + 1}, loss {float(train_l.mean()):f}')
epoch 1, loss 14.073989 epoch 2, loss 11.410689 epoch 3, loss 9.251449
print(f'w by: {w.reshape(true_w.shape)}') print(f'b by: {b}')
w by: tensor([ 0.5704, -0.9181], grad_fn=<ViewBackward>) b by: tensor([1.1257], requires_grad=True)
print(f'w Estimation error of: {true_w - w.reshape(true_w.shape)}') print(f'b Estimation error of: {true_b - b}')
w Estimation error of: tensor([ 1.4296, -2.4819], grad_fn=<SubBackward0>) b Estimation error of: tensor([3.0743], grad_fn=<RsubBackward1>)
Linear regression is implemented based on PyTorch
import numpy as np import torch from torch.utils import data from d2l import torch as d2l
true_w = torch.tensor([2,-3.4]) true_b = 4.2 features, labels = d2l.synthetic_data(true_w,true_b,1000)
Call PyTorch data iterator
def load_array(data_arrays, batch_size, is_train = True): """Construct a PyTorch Data iterator""" dataset = data.TensorDataset(*data_arrays) return data.DataLoader(dataset, batch_size, shuffle = is_train )#Is shuffle random
batch_size = 10 data_iter = load_array((features, labels), batch_size) data_iter
<torch.utils.data.dataloader.DataLoader at 0x7fb099b3bd90>
next(iter(data_iter))
[tensor([[ 0.8754, 0.8060], [-0.5434, -1.1498], [-0.4844, -0.1654], [ 1.1868, -0.8638], [ 1.0601, -0.0980], [ 0.0553, 0.1669], [ 1.5849, -0.6559], [ 0.7097, 0.8507], [-0.4372, 1.2454], [-0.5772, -0.5400]]), tensor([[ 3.2221], [ 7.0248], [ 3.8014], [ 9.5018], [ 6.6535], [ 3.7361], [ 9.5985], [ 2.7213], [-0.9234], [ 4.8920]])]
Model definition
torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)
- parameter
-
in_features – enter the size of the example
-
out_features – the size of the output
-
bias - add deviation item. Default: True
-
from torch import nn net = nn.Sequential(nn.Linear(2,1))
Parameter initialization
net[0].weight.data.normal_(0,0.01) net[0].bias.data.fill_(0)
tensor([0.])
loss = nn.MSELoss() trainer = torch.optim.SGD(net.parameters(),lr = 0.03)#lr is the learning rate
num_epochs = 3 for epoch in range(num_epochs): for X,y in data_iter: l = loss(net(X),y) trainer.zero_grad() l.backward() trainer.step() l = loss(net(features), labels) print(f'epoch {epoch + 1}, loss {l:f}')
epoch 1, loss 0.000228 epoch 2, loss 0.000095 epoch 3, loss 0.000095
import torch from torch import nn class MLP(nn.Module): # Declare a layer with model parameters, where two fully connected layers are declared def __init__(self, **kwargs): # Call the constructor of the MLP parent class Block to perform the necessary initialization. This allows you to specify other functions when constructing an instance super(MLP, self).__init__(**kwargs) self.hidden = nn.Linear(784, 256) self.act = nn.ReLU() self.output = nn.Linear(256,10) # Defines the forward calculation of the model, that is, how to calculate and return the required model output according to the input x def forward(self, x): o = self.act(self.hidden(x)) return self.output(o)
X = torch.rand(2,784) net = MLP() print(net)
MLP( (hidden): Linear(in_features=784, out_features=256, bias=True) (act): ReLU() (output): Linear(in_features=256, out_features=10, bias=True) )
python knowledge points supplement
Iterator for python
describe
Iteration is one of Python's most powerful functions and a way to access collection elements
An iterator is an object that remembers the location of the traversal
The iterator object is accessed from the first element of the collection until all the elements are accessed. Iterators can only move forward, not backward.
Iterators have two basic methods: iter() and next()
iter()
It is mainly used to generate iteration objects
next()
Returns the element at the next position of the current iteration object
list=[1,2,3,4] it = iter(list) # Create iterator object it >>> <list_iterator at 0x1c4dffd9c40>
print (next(it)) # The next element of the output iterator >>> 1
print (next(it)) >>> 2
python generator
describe
In Python, functions that use yield are called generator s
Different from ordinary functions, the generator is a function that returns an iterator and can only be used for iterative operations. More simply, the generator is an iterator. Note: it returns an iter
In the process of calling the generator to run, the function will pause and save all the current running information every time it encounters yield, return the value of yield, and continue to run from the current position the next time it executes the next() method
Calling a generator function returns an iterator object
reference
[^1]: Li Mu hands on deep learning V2.0.