% NeuralNetBackpropExMult.m
%
% By: Neil E. Cotter
% 11/03/2010
%
% Sigmoidal 2-layer neural net trained with backprop to learn y = x1 * x2.
% Output neuron is linear (no squasher).
%
% No inputs required.  Creates plots of desired function, learned function, err.
%
% Default values that user may change:
%  N = # neurons 1st layer
%  num_train_patterns = number of training patterns to run


% Set number of neurons in 1st layer.
if ~exist('N')
  N = 25;
end

% Set number of training patterns.
if ~exist('num_train_patterns')
  num_train_patterns = 20000;
end

% Set learning rate.
if ~exist('eta')
  eta = 0.1;
end

% Set domain to [-1,1] x [-1,1].
x1_min = -1;
x1_max = 1;
x2_min = -1;
x2_max = 1;

% Initialize 1st layer neurons with random weights.
for neuron_index = 1:N
  for synapse_index = 0:2
    w_1(neuron_index, synapse_index+1) = randn(1,1);
  end
end

% Initialize output neuron with random weights.
for synapse_index = 0:N
  w_2(1, synapse_index+1) = randn(1,1);
end

% Create figure showing decision boundaries before learning.
figure(5)

% Clear previous plot.
hold off

% Plot 1st-layer decision boundaries in green.
NeuralNetDecisionBoundaryPlot(w_1,'g-',[-1,1,-1,1])
% Plot 2nd-layer decision boundary in red.
hold on
NeuralNetDecisionBoundaryPlot(w_2,'r-',[-1,1,-1,1])

% Reset learning stats.
err_tally = 0;

% Train the network.
for pattern_iter = 1:num_train_patterns
  % select random inputs within domain.  Uniform distribution.
  x0 = 1;                 % Constant input plays role of threshold.
  x1 = 2*rand(1,1) - 1;
  x2 = 2*rand(1,1) - 1;
  xvec = [1, x1, x2];

  % Calculate the neuron outputs for 1st layer.
  for neuron_index = 1:N
    % Calculate weights * inputs.
    net_1(neuron_index) = w_1(neuron_index, :) * xvec';

    % Calculate output of 1st-layer neuron.
    o_1(neuron_index) = 1/(1 + exp(-net_1(neuron_index)));
  end

  % Calculate output of output neuron.
  net_2 = w_2 * [1, o_1]';
  
  % Desired output is y_d = x1 * x2.
  y_d = x1 * x2;
  
  % Calculate error in output.
  err = y_d - net_2;
  
  % Propagate error back to output neuron synapses.
  delta_w_2(1,1) = eta * 1 * err;
  for synapse_index = 1:N
    delta_w_2(1, synapse_index+1) = eta * o_1(synapse_index) * err;
  end

  % Propagate error back to 1st-layer neuron synapses.
  % Compute partial derivative back to output of 1st layer.
  dnet2_do_1 = w_2;

  % Compute weight updates for 1st layer.
  for neuron_index = 1:N
    for synapse_index = 0:2
      delta_w_1(neuron_index, synapse_index+1) ...
        = eta * xvec(synapse_index+1) ...
          * o_1(neuron_index) * (1 - o_1(neuron_index)) ...
          * dnet2_do_1(neuron_index+1) * err;
    end
  end

  % Update the output neuron weights.
  w_2 = w_2 + delta_w_2;

  % Clamp weight values so they don't go to infinity.
  w_2(find(w_2 > 10)) = 10;
  w_2(find(w_2 < -10)) = -10;
%{
  % Check for exploding weights in 1st layer.
  sum(sum(w_1))
  if isnan(sum(sum(w_1)))
    return
  end
  if abs(sum(sum(w_1))) > 100
    return
  end
%}
  % Update the 1st-layer weights.
  w_1 = w_1 + delta_w_1;

  % Clamp weight values so they don't go to infinity.
  w_1(find(w_1 > 10)) = 10;
  w_1(find(w_1 < -10)) = -10;

  % Check whether err got smaller.
  new_net_2 = NeuralNetBackpropOutput(w_1,w_2,xvec);
  new_err = y_d - new_net_2;

  % Gather err stats as learning proceeds.
  err_tally = err_tally + err;

  % Number of err pts to total for one plotted pt.
  num_err_per_pt = 1;

  % Start new running total every 100 pts.
  if mod(pattern_iter,num_err_per_pt) == 0
    % Save current total in array.
    err_vec(floor(pattern_iter/num_err_per_pt)) = err_tally;

    err_tally = err_tally;
    
    % Reset total err.
    err_tally = 0;
  end
end

% Plot results.
figure(1)
plot(1:length(err_vec),err_vec,'r-')

% Compute network outputs for mesh_grid.
% Create the x1,x2 pts
x1_limits = [-1,1];
x2_limits = [-1,1];
num_pts_x1_and_x2 = 101;

x1_spacing = (x1_limits(2)-x1_limits(1))/(num_pts_x1_and_x2 - 1);
x2_spacing = (x2_limits(2)-x2_limits(1))/(num_pts_x1_and_x2 - 1);
x1vec = x1_limits(1) : x1_spacing : x1_limits(2);
x2vec = x2_limits(1) : x2_spacing : x2_limits(2);
[xx1,xx2] = meshgrid(x1vec,x2vec);

% Compute network output, desired output, and err for each mesh pt.
for x1_index = 1:length(x1vec)
  for x2_index = 1:length(x2vec)
    x1 = xx1(x1_index,x2_index);
    x2 = xx2(x1_index,x2_index);
    xvec = [1, x1, x2];
    
    % Calculate output of output neuron.
    net_2plot(x1_index,x2_index) = NeuralNetBackpropOutput(w_1,w_2,xvec);
    
    % Desired output is y_d = x1 * x2.
    y_dplot(x1_index,x2_index) = x1 * x2;
    
  end
end

% Calculate error in output.
errplot = y_dplot - net_2plot;

% Plot the desired output surface.
figure(2)
surf(xx1,xx2,y_dplot)

% Plot the network output surface.
figure(3)
surf(xx1,xx2,net_2plot)

% Plot the error surface.
figure(4)
surf(xx1,xx2,errplot)

% Create figure showing decision boundaries before learning.
figure(6)

% Clear previous plot.
hold off

% Plot 1st-layer decision boundaries in green.
NeuralNetDecisionBoundaryPlot(w_1,'g-',[-1,1,-1,1])
% Plot 2nd-layer decision boundary in red.
hold on
NeuralNetDecisionBoundaryPlot(w_2,'r-',[-1,1,-1,1])