import matplotlib.pyplot as plt
from sklearn.datasets import make_friedman2
= make_friedman2(n_samples = 200, random_state=42, noise=0.5)
X, y print(X.shape)
print(y.shape)
(200, 4)
(200,)
A notebook to apply an FFN (Feed Forward Neural Network) to regress. We will use Friedman2 dataset from sklearn
Based on - Kaggle Notebook for Iris Classiifcation - PyTorch for Iris Dataset - Iris Classification
import matplotlib.pyplot as plt
from sklearn.datasets import make_friedman2
= make_friedman2(n_samples = 200, random_state=42, noise=0.5)
X, y print(X.shape)
print(y.shape)
(200, 4)
(200,)
=(5,5))
plt.figure(figsize0],y)
plt.scatter(X[:,
plt.show() plt.hist(y)
(array([64., 29., 30., 19., 23., 20., 5., 2., 5., 3.]),
array([ 13.8686335 , 177.60233879, 341.33604407, 505.06974936,
668.80345464, 832.53715992, 996.27086521, 1160.00457049,
1323.73827578, 1487.47198106, 1651.20568635]),
<BarContainer object of 10 artists>)
# split and scale.
from sklearn.model_selection import train_test_split
= train_test_split(X,y, test_size=0.2)
X_train, X_test, y_train, y_test
from sklearn.preprocessing import StandardScaler
= StandardScaler()
scaler = scaler.fit_transform(X_train)
X_train = scaler.transform(X_test) X_test
# Friedman2 response variable has huge dynamic range. The maximum value seems above above 1500.
= StandardScaler()
scaler
print(y_train.shape)
= y_train.reshape(-1, 1)
y_tmp = scaler.fit_transform(y_tmp)
y_train print(y_train.shape)
= y_test.reshape(-1, 1)
y_tmp = scaler.transform(y_tmp)
y_test print(y_test.shape)
(160,)
(160, 1)
(40, 1)
Exercise
Run the regression without scaling the response variable! What will be expected?
# load the libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
# convert numpy arrays into torch tensors
= torch.FloatTensor(X_train)
X_train = torch.FloatTensor(X_test)
X_test = torch.FloatTensor(y_train)
y_train = torch.FloatTensor(y_test) y_test
# define the model.
# the model is exactly the same as the model we saw in earlier
class MLP(nn.Module):
# define nn
def __init__(self, input_dim=4, output_dim=3, hidden_dim = [128,64]):
super(MLP, self).__init__()
self.input = nn.Linear(input_dim, hidden_dim[0])
self.hidden = nn.Linear(hidden_dim[0], hidden_dim[1])
self.out = nn.Linear(hidden_dim[1], output_dim)
self.relu = nn.ReLU()
def forward(self, X):
= self.relu(self.input(X))
X = self.relu(self.hidden(X))
X = self.out(X)
X return X
= 4
input_dim = 1
output_dim = [64, 64]
hidden_dim = MLP(input_dim=input_dim, output_dim=output_dim, hidden_dim=hidden_dim) model
= 0.01
learning_rate = nn.MSELoss()
criterion = torch.optim.Adam(model.parameters(),lr=learning_rate) optimizer
Since it is a regression problem, we have used MSE loss. Practically, this is the only change we have to make so far. And of course, how to evaluate the model perdictions has to change!
def train_network(model,optimizer,criterion,X_train,y_train,X_test,y_test,num_epochs,train_losses,test_losses):
for epoch in range(num_epochs):
#clear out the gradients from the last step loss.backward()
optimizer.zero_grad()
#forward feed
= model(X_train)
output_train
#calculate the loss
= criterion(output_train, y_train)
loss_train
#backward propagation: calculate gradients
loss_train.backward()
#update the weights
optimizer.step()
= model(X_test)
output_test = criterion(output_test,y_test)
loss_test
= loss_train.item()
train_losses[epoch] = loss_test.item()
test_losses[epoch]
if (epoch + 1) % 50 == 0:
print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {loss_train.item():.4f}, Test Loss: {loss_test.item():.4f}")
import numpy as np
= 1000
num_epochs = np.zeros(num_epochs)
train_losses = np.zeros(num_epochs)
test_losses train_network(model,optimizer,criterion,X_train,y_train,X_test,y_test,num_epochs,train_losses,test_losses)
Epoch 50/1000, Train Loss: 0.0040, Test Loss: 0.0068
Epoch 100/1000, Train Loss: 0.0010, Test Loss: 0.0037
Epoch 150/1000, Train Loss: 0.0005, Test Loss: 0.0032
Epoch 200/1000, Train Loss: 0.0003, Test Loss: 0.0028
Epoch 250/1000, Train Loss: 0.0002, Test Loss: 0.0026
Epoch 300/1000, Train Loss: 0.0001, Test Loss: 0.0025
Epoch 350/1000, Train Loss: 0.0001, Test Loss: 0.0025
Epoch 400/1000, Train Loss: 0.0001, Test Loss: 0.0024
Epoch 450/1000, Train Loss: 0.0016, Test Loss: 0.0037
Epoch 500/1000, Train Loss: 0.0000, Test Loss: 0.0024
Epoch 550/1000, Train Loss: 0.0000, Test Loss: 0.0023
Epoch 600/1000, Train Loss: 0.0008, Test Loss: 0.0030
Epoch 650/1000, Train Loss: 0.0000, Test Loss: 0.0023
Epoch 700/1000, Train Loss: 0.0000, Test Loss: 0.0022
Epoch 750/1000, Train Loss: 0.0000, Test Loss: 0.0022
Epoch 800/1000, Train Loss: 0.0002, Test Loss: 0.0024
Epoch 850/1000, Train Loss: 0.0000, Test Loss: 0.0022
Epoch 900/1000, Train Loss: 0.0000, Test Loss: 0.0022
Epoch 950/1000, Train Loss: 0.0000, Test Loss: 0.0021
Epoch 1000/1000, Train Loss: 0.0000, Test Loss: 0.0024
=(5,5))
plt.figure(figsize='train loss')
plt.plot(train_losses, label='test loss')
plt.plot(test_losses, label
plt.legend() plt.show()
= []
predictions_train = []
predictions_test with torch.no_grad():
= model(X_train)
predictions_train = model(X_test)
predictions_test
print(predictions_train.shape)
print(type(predictions_train))
print(y_train.shape)
print(type(y_train))
torch.Size([160, 1])
<class 'torch.Tensor'>
torch.Size([160, 1])
<class 'torch.Tensor'>
= y_test.numpy()
yt print(type(yt))
print(yt.shape)
= predictions_test.numpy()
yh print(type(yh))
print(yh.shape)
<class 'numpy.ndarray'>
(40, 1)
<class 'numpy.ndarray'>
(40, 1)
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import root_mean_squared_error as rmse
print('mse is: ', mse(yt, yh))
print('rmse is: ', rmse(yt, yh))
mse is: 0.00236044
rmse is: 0.04858436
plt.scatter(yt,yh)
= yt-yh
residuals
plt.hist(residuals)
plt.show()
import pandas as pd
='density') pd.DataFrame(residuals).plot(kind
Compared to FFNs with Classification, for regression, we used MSE loss, keeping all else the same (code wise). While making predictions, we just used the outputs of the model as is.