Una forma sencilla y rápida de aprender JAVA, observando y deduciendo cómo se comporta el lenguaje a través de ejemplos prácticos.

domingo, 7 de julio de 2019

Perceptrón multicapa. Cálculo real de todas las variables partiendo de un dataset básico.

Tras los posts anteriores (visualizaciones esquemáticas de formulas) el siguiente paso pasa por realizar el cálculo real de todas las variables (salidas, errores y pesos actualizados) partiendo de un dataset(D) de un sólo registro.



Código Java:

package perceptron_p;

import java.util.ArrayList;
import java.util.List;

public class Perceptron_p {

   public static void main(String[] args) {

      //sketch: num neuronas
      List<Integer> nN = new ArrayList<>();
      nN.add(2);
      nN.add(3);
      nN.add(2);
      nN.add(1);

      double µ = 0.001;
      int r = 0; // num. registro del dataset
      int nK = nN.size(); // num. de capas = 4
      int nE = nN.get(0); // num. de entradas = 2
      int nS = nN.get(nK - 1); // num. de salidas = 1

      int max = nN.stream().mapToInt(i -> i).max().getAsInt(); // num. max. neuronas por capa
      String tmp;
      double d_tmp;


      // Inicializar dataset (sólo 1 registro o fila)

      double[][] D = new double[1][3];
      D[0][0] = 0.48d; // entrada 1
      D[0][1] = 0.33d; // entrada 2
      D[0][2] = 0.81d; // salida  1

      // Inicializar pesos W, WBias, Y
      double[][][] W = new double[nK][max][max];
      double[][] WB = new double[nK][max];
      double[][] Y = new double[nK][max];
      double[][] E = new double[nK][max];

      System.out.println("Inicializar pesos con valores aleatorios:\n");
      for (int k = 1; k < nK; k++) {
         for (int j = 0; j < nN.get(k); j++) {
            for (int i = 0; i < nN.get(k - 1); i++) {
               W[k][j][i] = Math.random();
               System.out.format("W[%d][%d][%d] = %f\n", k, j, i, W[k][j][i]);
            }
            WB[k][j] = Math.random();
            System.out.format("WB[%d][%d] = %f\n\n", k, j, WB[k][j]);
            Y[k][j] = 0.f;
            E[k][j] = 0.f;
         }
      }

      // 0.0:-------------------------------------------------------------------
      System.out.println("Forward:\n");
      int k = 1;
      for (int j = 0; j < nN.get(k); j++) {
         tmp = "";
         d_tmp = 0f;
         for (int i = 0; i < nE; i++) {
            tmp += String.format("( D[%d][%d] * W[%d][%d][%d] ) + ", r, i, k, j, i);
            d_tmp += D[r][i] * W[k][j][i];
         }
         Y[k][j] = F(d_tmp + (-1 * WB[k][j]), 0);
         System.out.format("Y[%d][%d] = F{ %s( -1 * WB[%d][%d] ) } = ", k, j, tmp, k, j);
         System.out.println(Y[k][j]);
      }

      // 0.1:-------------------------------------------------------------------
      System.out.println("");
      for (k = 2; k < nK; k++) {
         for (int j = 0; j < nN.get(k); j++) {
            tmp = "";
            d_tmp = 0f;
            for (int i = 0; i < nN.get(k - 1); i++) {
               tmp += String.format("( Y[%d][%d] * W[%d][%d][%d] ) + ", k - 1, i, k, j, i);
               d_tmp += Y[k - 1][i] * W[k][j][i];
            }
            System.out.format("Y[%d][%d] = F{ %s( -1 * WB[%d][%d] ) } = ", k, j, tmp, k, j);
            Y[k][j] = F(d_tmp + (-1 * WB[k][j]), 0);
            System.out.println(Y[k][j]);
         }
         System.out.println("");
      }

      // 1.0:-------------------------------------------------------------------
      System.out.println("Backward:\n");
      k = nK - 1;
      for (int j = 0; j < nS; j++) {
         E[k][j] = D[r][nE + j] - Y[k][j];
         System.out.format("E[%d][%d] = ( D[%d][%d] - Y[%d][%d] ) = ", k, j, r, nE + j, k, j);
         System.out.println(E[k][j]);
      }

      // 1.1:-------------------------------------------------------------------
      System.out.println("");
      for (k = k - 1; k > 0; k--) {
         for (int j = nN.get(k); j > 0; j--) {
            tmp = "";
            d_tmp = 0.d;
            for (int i = nN.get(k + 1); i > 0; i--) {
               tmp += String.format("( E[%d][%d] * W[%d][%d][%d] ) + ", k + 1, i - 1, k + 1, i - 1, j - 1);
               d_tmp += E[k + 1][i - 1] * W[k + 1][i - 1][j - 1];
            }
            System.out.format("E[%d][%d] = %s%s = ", k, j - 1, tmp, "0 ");
            System.out.println(d_tmp);
            E[k][j - 1] = d_tmp;
         }
         System.out.println("");
      }

      // 2.0:-------------------------------------------------------------------
      System.out.println("Ajuste pesos:\n");
      k = 1;
      for (int j = 0; j < nN.get(k); j++) {
         for (int i = 0; i < nE; i++) {
            W[k][j][i] += (µ * E[k][j] * F(Y[k][j], 1) * D[r][i]);
            System.out.format("W'[%d][%d][%d] = W[%d][%d][%d] + ( µ * E[%d][%d] * F'(Y[%d][%d]) * D[%d][%d] ) = ", k, j, i, k, j, i, k, j, k, j, r, i);
            System.out.println(W[k][j][i]);
         }
         WB[k][j] += (µ * E[k][j] * F(Y[k][j], 1) * -1);
         System.out.format("WB'[%d][%d] = WB[%d][%d] + ( µ * E[%d][%d] * F'(Y[%d][%d]) * (%d) ) = ", k, j, k, j, k, j, k, j, - 1);
         System.out.println(WB[k][j] + "\n");
      }

      // 2.1:-------------------------------------------------------------------
      System.out.println("");
      for (k = 2; k < nK; k++) {
         for (int j = 0; j < nN.get(k); j++) {
            for (int i = 0; i < nN.get(k - 1); i++) {
               W[k][j][i] += (µ * E[k][j] * F(Y[k][j], 1) * Y[k - 1][i]);
               System.out.format("W'[%d][%d][%d] = W[%d][%d][%d] + ( µ * E[%d][%d] * F'( Y[%d][%d] ) * Y[%d][%d] ) = ", k, j, i, k, j, i, k, j, k, j, k - 1, i);
               System.out.println(W[k][j][i]);
            }
            WB[k][j] += (µ * E[k][j] * F(Y[k][j], 2) * (-1));
            System.out.format("WB'[%d][%d] = WB[%d][%d] + ( µ * E[%d][%d] * F'(Y[%d][%d]) * (%d) ) = ", k, j, k, j, k, j, k, j, -1);
            System.out.println(WB[k][j] + "\n");
         }
         System.out.println("");
      }
   }

   // Función Activación - Derivada
   private static double F(double n, int op) {
      double fx;
      switch (op) {
         case 0: // función sigmoide
            fx = 1.0 / (1.0 + Math.pow(Math.E, -n));
            break;
         case 1: // derivada función sigmoide
            fx = 1.0 / (1.0 + Math.pow(Math.E, -n));
            fx = fx * (1 - fx);
            break;
         default:
            fx = 0.f;
            break;
      }
      return fx;
   }

}



Resultado:

run:
Inicializar pesos con valores aleatorios:

W[1][0][0] = 0,835049
W[1][0][1] = 0,308429
WB[1][0] = 0,052288

W[1][1][0] = 0,217787
W[1][1][1] = 0,348034
WB[1][1] = 0,588298

W[1][2][0] = 0,961801
W[1][2][1] = 0,796138
WB[1][2] = 0,044188

W[2][0][0] = 0,994101
W[2][0][1] = 0,211034
W[2][0][2] = 0,711812
WB[2][0] = 0,952732

W[2][1][0] = 0,423756
W[2][1][1] = 0,953356
W[2][1][2] = 0,249125
WB[2][1] = 0,973105

W[3][0][0] = 0,842807
W[3][0][1] = 0,794736
WB[3][0] = 0,290240

Forward:

Y[1][0] = F{ ( D[0][0] * W[1][0][0] ) + ( D[0][1] * W[1][0][1] ) + ( -1 * WB[1][0] ) } = 0.6107146316470909
Y[1][1] = F{ ( D[0][0] * W[1][1][0] ) + ( D[0][1] * W[1][1][1] ) + ( -1 * WB[1][1] ) } = 0.40880474360914854
Y[1][2] = F{ ( D[0][0] * W[1][2][0] ) + ( D[0][1] * W[1][2][1] ) + ( -1 * WB[1][2] ) } = 0.6637837549590786

Y[2][0] = F{ ( Y[1][0] * W[2][0][0] ) + ( Y[1][1] * W[2][0][1] ) + ( Y[1][2] * W[2][0][2] ) + ( -1 * WB[2][0] ) } = 0.5530844896875817
Y[2][1] = F{ ( Y[1][0] * W[2][1][0] ) + ( Y[1][1] * W[2][1][1] ) + ( Y[1][2] * W[2][1][2] ) + ( -1 * WB[2][1] ) } = 0.46028147962310223

Y[3][0] = F{ ( Y[2][0] * W[3][0][0] ) + ( Y[2][1] * W[3][0][1] ) + ( -1 * WB[3][0] ) } = 0.6322090580829307

Backward:

E[3][0] = ( D[0][2] - Y[3][0] ) = 0.17779094191706935

E[2][1] = ( E[3][0] * W[3][0][1] ) + 0  = 0.14129683962211012
E[2][0] = ( E[3][0] * W[3][0][0] ) + 0  = 0.1498433870202291

E[1][2] = ( E[2][1] * W[2][1][2] ) + ( E[2][0] * W[2][0][2] ) + 0  = 0.1418609770496571
E[1][1] = ( E[2][1] * W[2][1][1] ) + ( E[2][0] * W[2][0][1] ) + 0  = 0.16632825764793913
E[1][0] = ( E[2][1] * W[2][1][0] ) + ( E[2][0] * W[2][0][0] ) + 0  = 0.2088347664166445

Ajuste pesos:

W'[1][0][0] = W[1][0][0] + ( µ * E[1][0] * F'(Y[1][0]) * D[0][0] ) = 0.8350717767495889
W'[1][0][1] = W[1][0][1] + ( µ * E[1][0] * F'(Y[1][0]) * D[0][1] ) = 0.3084447906938308
WB'[1][0] = WB[1][0] + ( µ * E[1][0] * F'(Y[1][0]) * (-1) ) = 0.05224031629899943

W'[1][1][0] = W[1][1][0] + ( µ * E[1][1] * F'(Y[1][1]) * D[0][0] ) = 0.21780595558207116
W'[1][1][1] = W[1][1][1] + ( µ * E[1][1] * F'(Y[1][1]) * D[0][1] ) = 0.34804701774866154
WB'[1][1] = WB[1][1] + ( µ * E[1][1] * F'(Y[1][1]) * (-1) ) = 0.5882576504954621

W'[1][2][0] = W[1][2][0] + ( µ * E[1][2] * F'(Y[1][2]) * D[0][0] ) = 0.9618160101133663
W'[1][2][1] = W[1][2][1] + ( µ * E[1][2] * F'(Y[1][2]) * D[0][1] ) = 0.7961484238865005
WB'[1][2] = WB[1][2] + ( µ * E[1][2] * F'(Y[1][2]) * (-1) ) = 0.04415614986229021


W'[2][0][0] = W[2][0][0] + ( µ * E[2][0] * F'( Y[2][0] ) * Y[1][0] ) = 0.9941217290526374
W'[2][0][1] = W[2][0][1] + ( µ * E[2][0] * F'( Y[2][0] ) * Y[1][1] ) = 0.21104869817169436
W'[2][0][2] = W[2][0][2] + ( µ * E[2][0] * F'( Y[2][0] ) * Y[1][2] ) = 0.7118355488714299
WB'[2][0] = WB[2][0] + ( µ * E[2][0] * F'(Y[2][0]) * (-1) ) = 0.9527319900854492

W'[2][1][0] = W[2][1][0] + ( µ * E[2][1] * F'( Y[2][1] ) * Y[1][0] ) = 0.4237764313332067
W'[2][1][1] = W[2][1][1] + ( µ * E[2][1] * F'( Y[2][1] ) * Y[1][1] ) = 0.953369304782218
W'[2][1][2] = W[2][1][2] + ( µ * E[2][1] * F'( Y[2][1] ) * Y[1][2] ) = 0.2491472992670435
WB'[2][1] = WB[2][1] + ( µ * E[2][1] * F'(Y[2][1]) * (-1) ) = 0.9731049511389623


W'[3][0][0] = W[3][0][0] + ( µ * E[3][0] * F'( Y[3][0] ) * Y[2][0] ) = 0.8428289253648974
W'[3][0][1] = W[3][0][1] + ( µ * E[3][0] * F'( Y[3][0] ) * Y[2][1] ) = 0.7947544171128877
WB'[3][0] = WB[3][0] + ( µ * E[3][0] * F'(Y[3][0]) * (-1) ) = 0.29024004297076156


BUILD SUCCESSFUL (total time: 2 seconds)



Con la tecnología de Blogger.