sign(output) must be correct
#!/usr/bin/env python3
#!/usr/bin/env python3
import torch
import math
import matplotlib.pyplot as plt
#
# tanh layer
#
class MyLayer(torch.nn.Module): # inheritance
def __init__(self, dim1, dim2): # constructor
super().__init__()
self.weights = torch.randn(dim1,dim2,requires_grad=True)
self.bias = torch.randn(dim1,requires_grad=True)
def forward(self, x): # define forward pass
return torch.tanh(torch.matmul(self.weights,x)-self.bias)
def update(self, eps, nBatch): # updating weights / bias
with torch.no_grad():
self.weights -= eps*self.weights.grad / nBatch
self.bias -= eps*self.bias.grad / nBatch
self.weights.grad = None
self.bias.grad = None
#
# main
#
dimOutput = 1 # only 1 implemented
dimHidden = 2
dimInput = 2 # only 2 implemented
nBatch = 4 # only 4 implemented
nEpoch = 1000
learningRate = 4.0e-2
myLayerObject = MyLayer(dimHidden,dimInput) # instanstiation
myOutputObject = MyLayer(1,dimHidden)
# XOR for 2 inputs
booleanInput = torch.tensor([ [ 1.0, 1.0],
[ 1.0,-1.0],
[-1.0, 1.0],
[-1.0,-1.0] ])
booleanValue = torch.tensor([ [-1.0],
[ 1.0],
[ 1.0],
[-1.0] ])
print(booleanInput)
print(booleanValue)
#
# training loop
#
for iEpoch in range(nEpoch): # trainning loop
for iBatch in range(nBatch): # a batch for every epoch
#
thisInput = booleanInput[iBatch]
thisTarget = booleanValue[iBatch]
#
hidden = myLayerObject(thisInput) # forward pass (implicit)
output = myOutputObject(hidden)
loss = (output-thisTarget).pow(2).sum() # generic loss function
#--- alternative loss function
#--- ** just the sign has to be correct, may work only
#--- for some initial conditions (qualitatively)
# loss = torch.relu(0.75-output*thisTarget).pow(2).sum()
loss.backward() # adding gradients
if iEpoch>(nEpoch-9):
print(f'{thisInput.tolist()[0]:7.3f}' ,end="")
print(f'{thisInput.tolist()[1]:7.3f}' ,end="")
print(f'{thisTarget.tolist()[0]:7.3f} ||',end="")
print(f'{output.tolist()[0]:7.3f}')
if iBatch==(nBatch-1):
print()
#
myLayerObject.update(learningRate,nBatch) # gradients have
myOutputObject.update(learningRate,nBatch) # been summed up
# end of training
torch.linspace   nomen est omen
np.random.randint   random integers in range
#!/usr/bin/env python3
import torch
import math
import numpy as np
import matplotlib.pyplot as plt
#
# global variables
#
dimOutput = 1 # only 1 implemented
dimHidden = 40
dimInput = 1 # only 1 implemented
nData = 20 # number training pairs
nBatch = 20
nEpoch = 1000
learningRate = 4.0e-2
xMax = 3.0 # for data / plotting
#
# general layer
#
class MyLayer(torch.nn.Module): # inheritance
def __init__(self, dim1, dim2): # constructor
super().__init__()
self.weights = torch.zeros(dim1,dim2,requires_grad=True)
self.bias = torch.zeros(dim1, requires_grad=True)
mySigma = 1.0/math.sqrt(dim2) # scaling of weights
torch.nn.init.normal_(self.weights, mean=0.0, std=mySigma)
def forward(self, x): # tanh unit
return torch.tanh(torch.matmul(self.weights,x)-self.bias)
def forward_linear(self, x): # linear unit
return torch.matmul(self.weights,x) - self.bias
def update(self, eps):
with torch.no_grad():
self.weights -= eps*self.weights.grad
self.bias -= eps*self.bias.grad
self.weights.grad = None
self.bias.grad = None
#
# target: Bell curve and beyond
#
def target_curve(x):
return torch.exp(-0.5*x.pow(2)) / math.sqrt(2.0*math.pi)
# return torch.sin(x.pow(2)) + torch.cos(x)
#
# fixed training data
#
dataInput = torch.zeros((nData,dimInput))
dataInput[:,0] = torch.linspace(-xMax,xMax,nData)
dataValue = target_curve( dataInput[:,0] )
# print("\n# dataInput", dataInput.shape, "\n", dataInput)
# print("\n# dataValue", dataValue.shape, "\n", dataValue)
#
# instantiate model, define forward pass
#
layerHidden = MyLayer(dimHidden,dimInput)
layerOutput = MyLayer(dimOutput,dimHidden)
def modelForward(myInput):
hidden = layerHidden(myInput) # forward pass (implicit)
return layerOutput.forward_linear(hidden) # linear output units
#
# training loop
#
for iEpoch in range(nEpoch): # trainning loop
randIntArray = np.random.randint(nData,size=nBatch) # random sampling
# print("\n# randIntArray\n", randIntArray)
#
for iBatch in range(nBatch):
batchInput = dataInput[randIntArray[iBatch],:]
batchValue = dataValue[randIntArray[iBatch]]
output = modelForward(batchInput) # forward pass
trainingLoss = (output-batchValue).pow(2).sum()
trainingLoss.backward() # backward pass
layerHidden.update(learningRate/nBatch) # updating
layerOutput.update(learningRate/nBatch) # gradients
# print("# ", iIter, trainingLoss.tolist())
tenPercent = int(nEpoch/10)
if (iEpoch%tenPercent==0):
print(f'{iEpoch:7d} {trainingLoss:9.5f}')
#
# testing
#
nPlot = 100
xPlot = [-xMax + iPlot*2.0*xMax/nPlot for iPlot in range(nPlot)]
yPlot = [0.0 for _ in range(nPlot)]
zPlot = [0.0 for _ in range(nPlot)]
testInput = torch.zeros(dimInput)
for iPlot in range(nPlot):
testInput[0] = xPlot[iPlot]
testOutput = modelForward(testInput) # forward pass with test data
yPlot[iPlot] = target_curve( testInput[0] ).item()
zPlot[iPlot] = testOutput[0].item()
if (1==2):
for iPlot in range(nPlot):
print(xPlot[iPlot],yPlot[iPlot],zPlot[iPlot])
xPoints = [ dataInput[ii,0] for ii in range(nData)]
yPoints = [ dataValue[ii] for ii in range(nData)]
#
# plotting
#
plt.plot(xPlot, yPlot, 'k', label="data curve")
plt.plot(xPoints, yPoints, '.r', label="data points", markersize=8)
plt.plot(xPlot, zPlot, '--b', label="inference", linewidth=3.0)
plt.legend()
plt.xlabel('input activity')
plt.ylabel('output activity')
plt.savefig('foo.svg')
plt.show()
#!/usr/bin/env python3
import torch
import math
import numpy as np
import matplotlib.pyplot as plt
#
# global variables
#
dimOutput = 1 # only 1 implemented
dimHidden = 40
dimInput = 1 # only 1 implemented
nHidden = 2 # number of hidden layers
nData = 20 # number training pairs
nBatch = 20
nEpoch = 2000
learningRate = 4.0e-2 # eta
momemtum_mu = 0.8 # for momentum updating
xMax = 3.0 # for data / plotting
#
# general layer
#
class MyLayer(torch.nn.Module):
def __init__(self, dim1, dim2, mu=0.0):
super().__init__()
self.weights = torch.zeros(dim1,dim2,requires_grad=True)
self.bias = torch.zeros(dim1,requires_grad=True)
mySigma = 1.0/math.sqrt(dim2) # scaling of weights
torch.nn.init.normal_(self.weights, mean=0.0, std=mySigma)
self.weights_v = torch.zeros(dim1,dim2) # associated
self.bias_v = torch.zeros(dim1) # velocities
self.mu = mu # mometum update parameter [0,1]
def forward(self, x): # tanh unit
return torch.tanh(torch.matmul(self.weights,x)-self.bias)
def forward_linear(self, x): # linear unit
return torch.matmul(self.weights,x) - self.bias
def update(self, eps):
with torch.no_grad():
self.weights_v = self.mu*self.weights_v \
- eps*self.weights.grad # update
self.bias_v = self.mu*self.bias_v \
- eps*self.bias.grad # velocities
self.weights += self.weights_v
self.bias += self.bias_v
self.weights.grad = None
self.bias.grad = None
#
# target: Bell curve and beyond
#
def target_curve(x):
# return torch.exp(-0.5*x.pow(2)) / math.sqrt(2.0*math.pi)
return torch.sin(x.pow(2)) + torch.cos(x)
#
# fixed training data
#
dataInput = torch.zeros((nData,dimInput))
dataInput[:,0] = torch.linspace(-xMax,xMax,nData)
dataValue = target_curve( dataInput[:,0] )
# print("\n# dataInput\n", dataInput)
# print("\n# dataValue\n", dataValue)
#
# instantiate model, define forward pass
#
allHidden = [None for iH in range(nHidden)]
allHidden[0] = MyLayer(dimHidden,dimInput,momemtum_mu)
for iH in range(1,nHidden):
allHidden[iH] = MyLayer(dimHidden,dimHidden,momemtum_mu)
layerOutput = MyLayer(dimOutput,dimHidden,momemtum_mu)
def modelForward(myInput):
hidden = allHidden[0](myInput) # input -> first hidden
for iH in range(1,nHidden):
hidden = allHidden[iH](hidden)
return layerOutput.forward_linear(hidden) # linear output units
#
# training loop
#
for iEpoch in range(nEpoch): # trainning loop
randIntArray = np.random.randint(nData, size=nBatch)
# print("\n# randIntArray\n", randIntArray)
for iBatch in range(nBatch):
batchInput = dataInput[randIntArray[iBatch],:]
batchValue = dataValue[randIntArray[iBatch]]
output = modelForward(batchInput) # forward pass
trainingLoss = (output-batchValue).pow(2).sum()
trainingLoss.backward() # backward pass
for iH in range(nHidden):
allHidden[iH].update(learningRate/nBatch)
layerOutput.update(learningRate/nBatch)
if (iEpoch%int(nEpoch/20)==0):
print(f'{iEpoch:7d} {trainingLoss:9.5f}')
#
# testing
#
nPlot = 100
xPlot = [-xMax + iPlot*2.0*xMax/nPlot for iPlot in range(nPlot)]
yPlot = [0.0 for _ in range(nPlot)]
zPlot = [0.0 for _ in range(nPlot)]
testInput = torch.zeros(dimInput)
for iPlot in range(nPlot):
testInput[0] = xPlot[iPlot]
testOutput = modelForward(testInput) # forward pass with test data
yPlot[iPlot] = target_curve( testInput[0] ).item()
zPlot[iPlot] = testOutput[0].item()
if (1==2):
for iPlot in range(nPlot):
print(xPlot[iPlot],yPlot[iPlot],zPlot[iPlot])
xPoints = [ dataInput[ii,0] for ii in range(nData)]
yPoints = [ dataValue[ii] for ii in range(nData)]
#
# plotting
#
plt.plot(xPlot, yPlot, 'k', label="data curve")
plt.plot(xPoints, yPoints, '.r', label="data points", markersize=8)
plt.plot(xPlot, zPlot, '--b', label="inference", linewidth=3.0)
plt.legend()
plt.xlabel('input activity')
plt.ylabel('output activity')
plt.savefig('foo.svg')
plt.show()
self.weights =
torch.nn.Parameter(torch.zeros(dim1,dim2))optimOut = torch.optim.Adam(layerOut.parameters(),
lr=learningRate)optimOut.step()optimOut.zero_grad()update() routine in layer module
#!/usr/bin/env python3
import torch
import math
import numpy as np
import matplotlib.pyplot as plt
#
# global variables
#
dimOutput = 1 # only 1 implemented
dimHidden = 40
dimInput = 1 # only 1 implemented
nHidden = 2 # at least one
nData = 20 # number training pairs
nBatch = 20
nEpoch = 1000
learningRate = 1.0e-3 # eta
xMax = 3.0 # for data / plotting
#
# general layer
#
class MyLayer(torch.nn.Module):
def __init__(self, dim1, dim2):
super().__init__()
self.weights = torch.nn.Parameter(torch.zeros(dim1,dim2))
self.bias = torch.nn.Parameter(torch.zeros(dim1)) # to be adapted
mySigma = 1.0/math.sqrt(dim2) # scaling of weights
torch.nn.init.normal_(self.weights, mean=0.0, std=mySigma)
def forward(self, x): # tanh unit
return torch.tanh(torch.matmul(self.weights,x)-self.bias)
def forward_linear(self, x): # linear unit
return torch.matmul(self.weights,x) - self.bias
#
# target: Bell curve and beyond
#
def target_curve(x):
# return torch.exp(-0.5*x.pow(2)) / math.sqrt(2.0*math.pi)
return torch.sin(x.pow(2)) + torch.cos(x)
#
# fixed training data
#
dataInput = torch.zeros((nData,dimInput))
dataInput[:,0] = torch.linspace(-xMax,xMax,nData)
dataValue = target_curve( dataInput[:,0] )
#
# instantiate model
#
allHidden = [None for iH in range(nHidden)]
allHidden[0] = MyLayer(dimHidden,dimInput)
for iH in range(1,nHidden):
allHidden[iH] = MyLayer(dimHidden,dimHidden)
layerOut = MyLayer(dimOutput,dimHidden)
#
# instantiate optimizer
# SGD: stochastic gradient descent
#
allOptim = [None for iH in range(nHidden)]
for iH in range(nHidden):
allOptim[iH] = torch.optim.SGD(allHidden[iH].parameters(),
lr=learningRate,momentum=0.7)
optimOut = torch.optim.Adam(layerOut.parameters(),lr=learningRate)
#
# define forward pass
#
def modelForward(myInput):
hidden = allHidden[0](myInput)
for iH in range(1,nHidden):
hidden = allHidden[iH](hidden)
return layerOut.forward_linear(hidden)
#
# training loop
#
for iEpoch in range(nEpoch): # trainning loop
randIntArray = np.random.randint(nData, size=nBatch)
for iBatch in range(nBatch):
batchInput = dataInput[randIntArray[iBatch],:]
batchValue = dataValue[randIntArray[iBatch]]
output = modelForward(batchInput) # forward pass
trainingLoss = (output-batchValue).pow(2).sum()
trainingLoss.backward() # backward pass
for iH in range(nHidden):
allOptim[iH].step() # adapting parameters
allOptim[iH].zero_grad() # zero gradients
optimOut.step()
optimOut.zero_grad()
if (iEpoch%int(nEpoch/20)==0):
print(f'{iEpoch:7d} {trainingLoss:9.5f}')
#
# testing
#
nPlot = 100
xPlot = [-xMax + iPlot*2.0*xMax/nPlot for iPlot in range(nPlot)]
yPlot = [0.0 for _ in range(nPlot)]
zPlot = [0.0 for _ in range(nPlot)]
testInput = torch.zeros(dimInput)
for iPlot in range(nPlot):
testInput[0] = xPlot[iPlot]
testOutput = modelForward(testInput) # forward pass with test data
yPlot[iPlot] = target_curve( testInput[0] ).item()
zPlot[iPlot] = testOutput[0].item()
if (1==2):
for iPlot in range(nPlot):
print(xPlot[iPlot],yPlot[iPlot],zPlot[iPlot])
xPoints = [ dataInput[ii,0] for ii in range(nData)]
yPoints = [ dataValue[ii] for ii in range(nData)]
#
# plotting
#
plt.plot(xPlot, yPlot, 'k', label="data curve")
plt.plot(xPoints, yPoints, '.r', label="data points", markersize=8)
plt.plot(xPlot, zPlot, '--b', label="inference", linewidth=3.0)
plt.legend()
plt.xlabel('input activity')
plt.ylabel('output activity')
plt.savefig('foo.svg')
plt.show()