#Torch.nn.Linear y = x A^T + b torch.manual_seed(42) linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input out_features=6) # out_features = describes outer value x = tensor_A output = linear(x) x.shape, output, output.shape
other operations
1 2 3 4 5 6 7 8 9 10 11 12
tensor = torch.arange(10, 100, 10) # tensor([10, 20, 30, 40, 50, 60, 70, 80, 90]) tensor.argmax() # 8 tensor.argmin() # 0 tensor.type(torch.float16) # tensor([10., 20., 30., 40., 50., 60., 70., 80., 90.] torch.reshape(new_shape) # -1 is to ask calculating automatically tensor.view(new_shape) # return a new shape view torch.stack(t, dim=0) # concate a sequence of tensors along a new dimension(dim) torch.sequeeze() # all into the first dimensions torch.clamp() # min=min, max=max, limit the range torch.unsqueeze() torch.permute() # torch.Size([224, 224, 3]) -> torch.Size([3, 224, 224]) torch.permute_(), x.unsqueeze_() -> inplace operation
random seed torch.manual_seed(seed=RANDOM_SEED) torch.random.manual_seed(seed=RANDOM_SEED)
Variable
1 2 3 4 5 6 7 8 9 10
torch.autograd import Variable .data, .grad, .grad_fn x_tensor = torch.randn(10, 5) y_tensor = torch.randn(10, 5) x = Variable(x_tensor, requires_grad=True) y = Variable(y_tensor, requires_grad=True) z = torch.sum(x + y) print(z.data) #-2.1379 print(z.grad_fn) #<SumBackward0 object at 0x10da636a0> z.backward()
GPU
1 2 3 4 5 6 7 8 9
if torch.cuda.is_available(): device = "cuda"# Use NVIDIA GPU (if available) elif torch.backends.mps.is_available(): device = "mps"# Use Apple Silicon GPU (if available) else: device = "cpu"# Default to CPU if no GPU is available
tensor.to(device) tensor_on_gpu.cpu().numpy()
Neural network
torch.nn
Contains all of the building blocks for computational graphs
(essentially a series of computations executed in a particular way).
torch.nn.Parameter
Stores tensors that can be used with nn.Module. If requires_grad=True
gradients (used for updating model parameters via gradient descent) are
calculated automatically, this is often referred to as "autograd".
torch.nn.Module
The base class for all neural network modules, all the building
blocks for neural networks are subclasses. If you're building a neural
network in PyTorch, your models should subclass nn.Module. Requires a
forward() method be implemented.
torch.optim
Contains various optimization algorithms (these tell the model
parameters stored in nn.Parameter how to best change to improve gradient
descent and in turn reduce the loss).
def forward()
All nn.Module subclasses require a forward() method, this defines the
computation that will take place on the data passed to the particular
nn.Module (e.g. the linear regression formula above).
loss_fn = nn.L1Loss() # MAE loss is same as L1Loss optimizer = torch.optim.SGD(params=model_0.parameters(), lr=0.01) ## lr(learning rate)
Step name
What does it do?
Code example
1
Forward pass
The model goes through all of the training data once, performing its
forward() function calculations.
model(x_train)
2
Calculate the loss
The model's outputs (predictions) are compared to the ground truth
and evaluated to see how wrong they are.
loss = loss_fn(y_pred, y_train)
3
Zero gradients
The optimizers gradients are set to zero (they are accumulated by
default) so they can be recalculated for the specific training
step.
optimizer.zero_grad()
4
Perform backpropagation on the loss
Computes the gradient of the loss with respect for every model
parameter to be updated (each parameter with requires_grad=True)
loss.backward()
5
Update the optimizer (gradient descent)
Update the parameters with requires_grad=True with respect to the
loss gradients in order to improve them.
optimizer.step()
Training example
1 2 3 4 5 6 7
for epoch inrange(epoches): model.train() y_pred = model(X_train) loss = loss_fn(y_pred, y_true) optimizer.zero_grad() loss.backward() optimizer.step()
test
Forward pass
The model goes through all of the training data once, performing its
forward() function calculations.
model(x_test)
Calculate the loss
The model's outputs (predictions) are compared to the ground truth
and evaluated to see how wrong they are.
loss = loss_fn(y_pred, y_test)
Calulate evaluation metrics (optional)
Alongisde the loss value you may want to calculate other evaluation
metrics such as accuracy on the test set.
Custom functions
Inference and save model
Inferennce
model_0.eval() # Set the model in evaluation mode with
torch.inference_mode(): y_preds = model_0(X_test)
torch.save
Saves a serialized object to disk using Python's pickle utility.
Models, tensors and various other Python objects like dictionaries can
be saved using torch.save.
torch.load
Uses pickle's unpickling features to deserialize and load pickled
Python object files (like models, tensors or dictionaries) into memory.
You can also set which device to load the object to (CPU, GPU etc).
torch.nn.Module.load_state_dict ## recommended
Loads a model's parameter dictionary (model.state_dict()) using a
saved state_dict() object.
torch.manual_seed(42) epochs = 100# Set the number of epochs
# Create empty loss lists to track values train_loss_values = [] test_loss_values = [] epoch_count = []
for epoch inrange(epochs): ### Training model_0.train() # Put model in training mode (this is the default state of a model)
# 1. Forward pass on train data using the forward() method inside y_pred = model_0(X_train) # 2. Calculate the loss (how different are our models predictions to the ground truth) loss = loss_fn(y_pred, y_train) optimizer.zero_grad() # 3. Zero grad of the optimizer loss.backward() # 4. Loss backwards optimizer.step() # 5. Progress the optimizer ### Testing # Put the model in evaluation mode model_0.eval()
with torch.inference_mode(): # 1. Forward pass on test data test_pred = model_0(X_test)
# 2. Caculate loss on test data # predictions come in torch.float datatype, so comparisons need to be done with tensors of the same type test_loss = loss_fn(test_pred, y_test.type(torch.float))
# Print out what's happening if epoch % 10 == 0: epoch_count.append(epoch) train_loss_values.append(loss.detach().numpy()) test_loss_values.append(test_loss.detach().numpy()) print(f"Epoch: {epoch} | MAE Train Loss: {loss} | MAE Test Loss: {test_loss} ")
# Put data on the available device # Without this, error will happen (not all model/data on device) X_train = X_train.to(device) X_test = X_test.to(device) y_train = y_train.to(device) y_test = y_test.to(device)
for epoch inrange(epochs): ### Training model_1.train() # train mode is on by default after construction
# 1. Forward pass y_pred = model_1(X_train) # 2. Calculate loss loss = loss_fn(y_pred, y_train)
# 3. Zero grad optimizer optimizer.zero_grad()
# 4. Loss backward loss.backward()
# 5. Step the optimizer optimizer.step()
### Testing model_1.eval() # put the model in evaluation mode for testing (inference) # 1. Forward pass with torch.inference_mode(): test_pred = model_1(X_test)
# 2. Calculate the loss test_loss = loss_fn(test_pred, y_test)
if epoch % 100 == 0: print(f"Epoch: {epoch} | Train loss: {loss} | Test loss: {test_loss}")
# Create a neural net class classNet(nn.Module): # Constructor def__init__(self, num_classes=3): super(Net, self).__init__()
# Our images are RGB, so input channels = 3. We'll apply 12 filters in the first convolutional layer self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
# We'll apply max pooling with a kernel size of 2 self.pool = nn.MaxPool2d(kernel_size=2)
# A second convolutional layer takes 12 input channels, and generates 12 outputs self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
# A third convolutional layer takes 12 inputs and generates 24 outputs self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
# A drop layer deletes 20% of the features to help prevent overfitting self.drop = nn.Dropout2d(p=0.2) # Our 128x128 image tensors will be pooled twice with a kernel size of 2. 128/2/2 is 32. # So our feature tensors are now 32 x 32, and we've generated 24 of them # We need to flatten these and feed them to a fully-connected layer # to map them to the probability for each class self.fc = nn.Linear(in_features=32 * 32 * 24, out_features=num_classes)
defforward(self, x): # Use a relu activation function after layer 1 (convolution 1 and pool) x = F.relu(self.pool(self.conv1(x)))
# Use a relu activation function after layer 2 (convolution 2 and pool) x = F.relu(self.pool(self.conv2(x)))
# Select some features to drop after the 3rd convolution to prevent overfitting x = F.relu(self.drop(self.conv3(x)))
# Only drop the features if this is a training pass x = F.dropout(x, training=self.training)
# Flatten x = x.view(-1, 32 * 32 * 24) # Feed to fully-connected layer to predict class x = self.fc(x) # Return log_softmax tensor return F.log_softmax(x, dim=1)
import torch import torch.autograd as autograd import torch.nn as nn import torch.functional as F import torch.optim as optim from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence