Tras los posts anteriores (visualizaciones esquemáticas de formulas) el siguiente paso pasa por realizar el cálculo real de todas las variables (salidas, errores y pesos actualizados) partiendo de un dataset(D) de un sólo registro.
Código Java:
package perceptron_p;
import java.util.ArrayList;
import java.util.List;
public class Perceptron_p {
public static void main(String[] args) {
//sketch: num neuronas
List<Integer> nN = new ArrayList<>();
nN.add(2);
nN.add(3);
nN.add(2);
nN.add(1);
double µ = 0.001;
int r = 0; // num. registro del dataset
int nK = nN.size(); // num. de capas = 4
int nE = nN.get(0); // num. de entradas = 2
int nS = nN.get(nK - 1); // num. de salidas = 1
int max = nN.stream().mapToInt(i -> i).max().getAsInt(); // num. max. neuronas por capa
String tmp;
double d_tmp;
// Inicializar dataset (sólo 1 registro o fila)
double[][] D = new double[1][3];
D[0][0] = 0.48d; // entrada 1
D[0][1] = 0.33d; // entrada 2
D[0][2] = 0.81d; // salida 1
// Inicializar pesos W, WBias, Y
double[][][] W = new double[nK][max][max];
double[][] WB = new double[nK][max];
double[][] Y = new double[nK][max];
double[][] E = new double[nK][max];
System.out.println("Inicializar pesos con valores aleatorios:\n");
for (int k = 1; k < nK; k++) {
for (int j = 0; j < nN.get(k); j++) {
for (int i = 0; i < nN.get(k - 1); i++) {
W[k][j][i] = Math.random();
System.out.format("W[%d][%d][%d] = %f\n", k, j, i, W[k][j][i]);
}
WB[k][j] = Math.random();
System.out.format("WB[%d][%d] = %f\n\n", k, j, WB[k][j]);
Y[k][j] = 0.f;
E[k][j] = 0.f;
}
}
// 0.0:-------------------------------------------------------------------
System.out.println("Forward:\n");
int k = 1;
for (int j = 0; j < nN.get(k); j++) {
tmp = "";
d_tmp = 0f;
for (int i = 0; i < nE; i++) {
tmp += String.format("( D[%d][%d] * W[%d][%d][%d] ) + ", r, i, k, j, i);
d_tmp += D[r][i] * W[k][j][i];
}
Y[k][j] = F(d_tmp + (-1 * WB[k][j]), 0);
System.out.format("Y[%d][%d] = F{ %s( -1 * WB[%d][%d] ) } = ", k, j, tmp, k, j);
System.out.println(Y[k][j]);
}
// 0.1:-------------------------------------------------------------------
System.out.println("");
for (k = 2; k < nK; k++) {
for (int j = 0; j < nN.get(k); j++) {
tmp = "";
d_tmp = 0f;
for (int i = 0; i < nN.get(k - 1); i++) {
tmp += String.format("( Y[%d][%d] * W[%d][%d][%d] ) + ", k - 1, i, k, j, i);
d_tmp += Y[k - 1][i] * W[k][j][i];
}
System.out.format("Y[%d][%d] = F{ %s( -1 * WB[%d][%d] ) } = ", k, j, tmp, k, j);
Y[k][j] = F(d_tmp + (-1 * WB[k][j]), 0);
System.out.println(Y[k][j]);
}
System.out.println("");
}
// 1.0:-------------------------------------------------------------------
System.out.println("Backward:\n");
k = nK - 1;
for (int j = 0; j < nS; j++) {
E[k][j] = D[r][nE + j] - Y[k][j];
System.out.format("E[%d][%d] = ( D[%d][%d] - Y[%d][%d] ) = ", k, j, r, nE + j, k, j);
System.out.println(E[k][j]);
}
// 1.1:-------------------------------------------------------------------
System.out.println("");
for (k = k - 1; k > 0; k--) {
for (int j = nN.get(k); j > 0; j--) {
tmp = "";
d_tmp = 0.d;
for (int i = nN.get(k + 1); i > 0; i--) {
tmp += String.format("( E[%d][%d] * W[%d][%d][%d] ) + ", k + 1, i - 1, k + 1, i - 1, j - 1);
d_tmp += E[k + 1][i - 1] * W[k + 1][i - 1][j - 1];
}
System.out.format("E[%d][%d] = %s%s = ", k, j - 1, tmp, "0 ");
System.out.println(d_tmp);
E[k][j - 1] = d_tmp;
}
System.out.println("");
}
// 2.0:-------------------------------------------------------------------
System.out.println("Ajuste pesos:\n");
k = 1;
for (int j = 0; j < nN.get(k); j++) {
for (int i = 0; i < nE; i++) {
W[k][j][i] += (µ * E[k][j] * F(Y[k][j], 1) * D[r][i]);
System.out.format("W'[%d][%d][%d] = W[%d][%d][%d] + ( µ * E[%d][%d] * F'(Y[%d][%d]) * D[%d][%d] ) = ", k, j, i, k, j, i, k, j, k, j, r, i);
System.out.println(W[k][j][i]);
}
WB[k][j] += (µ * E[k][j] * F(Y[k][j], 1) * -1);
System.out.format("WB'[%d][%d] = WB[%d][%d] + ( µ * E[%d][%d] * F'(Y[%d][%d]) * (%d) ) = ", k, j, k, j, k, j, k, j, - 1);
System.out.println(WB[k][j] + "\n");
}
// 2.1:-------------------------------------------------------------------
System.out.println("");
for (k = 2; k < nK; k++) {
for (int j = 0; j < nN.get(k); j++) {
for (int i = 0; i < nN.get(k - 1); i++) {
W[k][j][i] += (µ * E[k][j] * F(Y[k][j], 1) * Y[k - 1][i]);
System.out.format("W'[%d][%d][%d] = W[%d][%d][%d] + ( µ * E[%d][%d] * F'( Y[%d][%d] ) * Y[%d][%d] ) = ", k, j, i, k, j, i, k, j, k, j, k - 1, i);
System.out.println(W[k][j][i]);
}
WB[k][j] += (µ * E[k][j] * F(Y[k][j], 2) * (-1));
System.out.format("WB'[%d][%d] = WB[%d][%d] + ( µ * E[%d][%d] * F'(Y[%d][%d]) * (%d) ) = ", k, j, k, j, k, j, k, j, -1);
System.out.println(WB[k][j] + "\n");
}
System.out.println("");
}
}
// Función Activación - Derivada
private static double F(double n, int op) {
double fx;
switch (op) {
case 0: // función sigmoide
fx = 1.0 / (1.0 + Math.pow(Math.E, -n));
break;
case 1: // derivada función sigmoide
fx = 1.0 / (1.0 + Math.pow(Math.E, -n));
fx = fx * (1 - fx);
break;
default:
fx = 0.f;
break;
}
return fx;
}
}
Resultado:
run:
Inicializar pesos con valores aleatorios:
W[1][0][0] = 0,835049
W[1][0][1] = 0,308429
WB[1][0] = 0,052288
W[1][1][0] = 0,217787
W[1][1][1] = 0,348034
WB[1][1] = 0,588298
W[1][2][0] = 0,961801
W[1][2][1] = 0,796138
WB[1][2] = 0,044188
W[2][0][0] = 0,994101
W[2][0][1] = 0,211034
W[2][0][2] = 0,711812
WB[2][0] = 0,952732
W[2][1][0] = 0,423756
W[2][1][1] = 0,953356
W[2][1][2] = 0,249125
WB[2][1] = 0,973105
W[3][0][0] = 0,842807
W[3][0][1] = 0,794736
WB[3][0] = 0,290240
Forward:
Y[1][0] = F{ ( D[0][0] * W[1][0][0] ) + ( D[0][1] * W[1][0][1] ) + ( -1 * WB[1][0] ) } = 0.6107146316470909
Y[1][1] = F{ ( D[0][0] * W[1][1][0] ) + ( D[0][1] * W[1][1][1] ) + ( -1 * WB[1][1] ) } = 0.40880474360914854
Y[1][2] = F{ ( D[0][0] * W[1][2][0] ) + ( D[0][1] * W[1][2][1] ) + ( -1 * WB[1][2] ) } = 0.6637837549590786
Y[2][0] = F{ ( Y[1][0] * W[2][0][0] ) + ( Y[1][1] * W[2][0][1] ) + ( Y[1][2] * W[2][0][2] ) + ( -1 * WB[2][0] ) } = 0.5530844896875817
Y[2][1] = F{ ( Y[1][0] * W[2][1][0] ) + ( Y[1][1] * W[2][1][1] ) + ( Y[1][2] * W[2][1][2] ) + ( -1 * WB[2][1] ) } = 0.46028147962310223
Y[3][0] = F{ ( Y[2][0] * W[3][0][0] ) + ( Y[2][1] * W[3][0][1] ) + ( -1 * WB[3][0] ) } = 0.6322090580829307
Backward:
E[3][0] = ( D[0][2] - Y[3][0] ) = 0.17779094191706935
E[2][1] = ( E[3][0] * W[3][0][1] ) + 0 = 0.14129683962211012
E[2][0] = ( E[3][0] * W[3][0][0] ) + 0 = 0.1498433870202291
E[1][2] = ( E[2][1] * W[2][1][2] ) + ( E[2][0] * W[2][0][2] ) + 0 = 0.1418609770496571
E[1][1] = ( E[2][1] * W[2][1][1] ) + ( E[2][0] * W[2][0][1] ) + 0 = 0.16632825764793913
E[1][0] = ( E[2][1] * W[2][1][0] ) + ( E[2][0] * W[2][0][0] ) + 0 = 0.2088347664166445
Ajuste pesos:
W'[1][0][0] = W[1][0][0] + ( µ * E[1][0] * F'(Y[1][0]) * D[0][0] ) = 0.8350717767495889
W'[1][0][1] = W[1][0][1] + ( µ * E[1][0] * F'(Y[1][0]) * D[0][1] ) = 0.3084447906938308
WB'[1][0] = WB[1][0] + ( µ * E[1][0] * F'(Y[1][0]) * (-1) ) = 0.05224031629899943
W'[1][1][0] = W[1][1][0] + ( µ * E[1][1] * F'(Y[1][1]) * D[0][0] ) = 0.21780595558207116
W'[1][1][1] = W[1][1][1] + ( µ * E[1][1] * F'(Y[1][1]) * D[0][1] ) = 0.34804701774866154
WB'[1][1] = WB[1][1] + ( µ * E[1][1] * F'(Y[1][1]) * (-1) ) = 0.5882576504954621
W'[1][2][0] = W[1][2][0] + ( µ * E[1][2] * F'(Y[1][2]) * D[0][0] ) = 0.9618160101133663
W'[1][2][1] = W[1][2][1] + ( µ * E[1][2] * F'(Y[1][2]) * D[0][1] ) = 0.7961484238865005
WB'[1][2] = WB[1][2] + ( µ * E[1][2] * F'(Y[1][2]) * (-1) ) = 0.04415614986229021
W'[2][0][0] = W[2][0][0] + ( µ * E[2][0] * F'( Y[2][0] ) * Y[1][0] ) = 0.9941217290526374
W'[2][0][1] = W[2][0][1] + ( µ * E[2][0] * F'( Y[2][0] ) * Y[1][1] ) = 0.21104869817169436
W'[2][0][2] = W[2][0][2] + ( µ * E[2][0] * F'( Y[2][0] ) * Y[1][2] ) = 0.7118355488714299
WB'[2][0] = WB[2][0] + ( µ * E[2][0] * F'(Y[2][0]) * (-1) ) = 0.9527319900854492
W'[2][1][0] = W[2][1][0] + ( µ * E[2][1] * F'( Y[2][1] ) * Y[1][0] ) = 0.4237764313332067
W'[2][1][1] = W[2][1][1] + ( µ * E[2][1] * F'( Y[2][1] ) * Y[1][1] ) = 0.953369304782218
W'[2][1][2] = W[2][1][2] + ( µ * E[2][1] * F'( Y[2][1] ) * Y[1][2] ) = 0.2491472992670435
WB'[2][1] = WB[2][1] + ( µ * E[2][1] * F'(Y[2][1]) * (-1) ) = 0.9731049511389623
W'[3][0][0] = W[3][0][0] + ( µ * E[3][0] * F'( Y[3][0] ) * Y[2][0] ) = 0.8428289253648974
W'[3][0][1] = W[3][0][1] + ( µ * E[3][0] * F'( Y[3][0] ) * Y[2][1] ) = 0.7947544171128877
WB'[3][0] = WB[3][0] + ( µ * E[3][0] * F'(Y[3][0]) * (-1) ) = 0.29024004297076156
BUILD SUCCESSFUL (total time: 2 seconds)
Una forma sencilla y rápida de aprender JAVA, observando y deduciendo cómo se comporta el lenguaje a través de ejemplos prácticos.
domingo, 7 de julio de 2019
Suscribirse a:
Entradas (Atom)
Con la tecnología de Blogger.