diff --git a/.gitignore b/.gitignore index cd531cf..13f78e0 100644 --- a/.gitignore +++ b/.gitignore @@ -52,3 +52,4 @@ Module.symvers Mkfile.old dkms.conf +csv/ diff --git a/headers/file.h b/headers/file.h new file mode 100644 index 0000000..bd3f747 --- /dev/null +++ b/headers/file.h @@ -0,0 +1,17 @@ +/* See LICENSE file for copyright and license details. */ + +#ifndef _file_h_ +#define _file_h_ + +typedef struct { + FILE *file; + char *file_name; +} File; + +File *new_file(char *file_name); +void free_file(File *f); +double get_entry(File *f, int row, int col, char sep); +int get_columns_num(File *f, int row, char sep); +int get_rows_num(File *f); + +#endif diff --git a/headers/layer.h b/headers/layer.h new file mode 100644 index 0000000..583f47c --- /dev/null +++ b/headers/layer.h @@ -0,0 +1,17 @@ +/* See LICENSE file for copyright and license details. */ + +#ifndef _layer_h_ +#define _layer_h_ + +#include "../headers/perceptron.h" + +typedef struct { + int p_num; + Perceptron **p; +} Layer; + +Layer *new_layer(int p_num, int i_num); +Layer *new_input_layer(int p_num); +void free_layer(Layer *l); + +#endif diff --git a/headers/neural_net.h b/headers/neural_net.h new file mode 100644 index 0000000..39ae484 --- /dev/null +++ b/headers/neural_net.h @@ -0,0 +1,16 @@ +/* See LICENSE file for copyright and license details. */ + +#ifndef _neural_net_h_ +#define _neural_net_h_ + +#include "layer.h" + +typedef struct { + int l_num; + Layer **l; +} NeuralNet; + +NeuralNet *new_neural_net(char *c_file_name, char *i_file_name); +void free_neural_net(NeuralNet *n); + +#endif diff --git a/headers/perceptron.h b/headers/perceptron.h new file mode 100644 index 0000000..e655704 --- /dev/null +++ b/headers/perceptron.h @@ -0,0 +1,15 @@ +/* See LICENSE file for copyright and license details. */ + +#ifndef _perceptron_h_ +#define _perceptron_h_ + +typedef struct { + int i_num; + double *weights; + double output; +} Perceptron; + +Perceptron *new_perceptron(int i_num); +void free_perceptron(Perceptron *p); + +#endif diff --git a/headers/training.h b/headers/training.h new file mode 100644 index 0000000..f80f988 --- /dev/null +++ b/headers/training.h @@ -0,0 +1,9 @@ +/* See LICENSE file for copyright and license details. */ + +#ifndef _training_h_ +#define _training_h_ + +void train_by_backpropagation(NeuralNet *n, int runs, double t_factor, + char *i_name, char *t_name); + +#endif diff --git a/src/file.c b/src/file.c new file mode 100644 index 0000000..b498128 --- /dev/null +++ b/src/file.c @@ -0,0 +1,99 @@ +/* See LICENSE file for copyright and license details. */ + +#include +#include + +#include "../headers/file.h" + +#define BUF_SIZE 1024 + +File +*new_file(char *file_name) +{ + File *f; + + if ((f = malloc(sizeof(File))) == NULL) { + fprintf(stderr, "memory for file was not allocated.\n"); + exit(EXIT_FAILURE); + } + + f->file_name = file_name; + //fprintf(stderr, "%s\n", f->file_name); + + if ((f->file = fopen(f->file_name, "r")) == NULL) { + fprintf(stderr, "file %s was not opened.\n", + f->file_name); + exit(EXIT_FAILURE); + } + + return f; +} + +void +free_file(File *f) +{ + if (fclose(f->file) == EOF) { + fprintf(stderr, "file %s was not closed.\n", + f->file_name); + exit(EXIT_FAILURE); + } + free(f); +} + +double +get_entry(File *f, int row, int col, char sep) +{ + int i; + char buf[BUF_SIZE]; + double x; + + rewind(f->file); + + for (i=0; ifile); + + for (i=0; ifile) != sep) + ; + } + + fscanf(f->file, "%lf", &x); + return x; +} + +int +get_columns_num(File *f, int row, char sep) +{ + int i, count; + char c, buf[BUF_SIZE]; + + c = '0'; + count = 1; + rewind(f->file); + + for (i=0; ifile); + + while (c != '\n') { + c = fgetc(f->file); + if (c == sep) + count++; + } + + return count; +} + +int +get_rows_num(File *f) +{ + char buf[BUF_SIZE]; + int count; + + count = 0; + rewind(f->file); + + while (fgets(buf, BUF_SIZE, f->file)) + count++; + + return count; +} diff --git a/src/layer.c b/src/layer.c new file mode 100644 index 0000000..486d6c6 --- /dev/null +++ b/src/layer.c @@ -0,0 +1,66 @@ +/* See LICENSE file for copyright and license details. */ + +#include +#include + +#include "../headers/layer.h" + +Layer +*new_layer(int p_num, int i_num) +{ + int i; + Layer *l; + + l = malloc(sizeof(Layer)); + + l->p_num = p_num; + + l->p = calloc(l->p_num, sizeof(Perceptron *)); + + for (i=0; ip_num; i++) { + l->p[i] = new_perceptron(i_num); + } + + if (l==NULL || l->p==NULL) { + fprintf(stderr, "memory for layer was not allocated.\n"); + exit(EXIT_FAILURE); + } + + return l; +} + +Layer +*new_input_layer(int p_num) +{ + int i; + Layer *l; + + l = malloc(sizeof(Layer)); + + l->p_num = p_num; + + l->p = calloc(l->p_num, sizeof(Perceptron *)); + + for (i=0; ip_num; i++) { + l->p[i] = new_perceptron(1); + } + + if (l==NULL || l->p==NULL) { + fprintf(stderr, "memory for layer was not allocated.\n"); + exit(EXIT_FAILURE); + } + + return l; +} + +void +free_layer(Layer *l) +{ + int i; + + for (i=0; ip_num; i++) { + free_perceptron(l->p[i]); + } + free(l->p); + free(l); +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..025a2d7 --- /dev/null +++ b/src/main.c @@ -0,0 +1,22 @@ +/* See LICENSE file for copyright and license details. */ + +#include +#include + +#include "../headers/file.h" +#include "../headers/neural_net.h" +#include "../headers/training.h" + +#define N 100000 + +int +main(int argc, char *argv[]) +{ + NeuralNet *n = new_neural_net(argv[1], argv[2]); + + train_by_backpropagation(n, N, 1, argv[2], argv[3]); + + free_neural_net(n); + + return EXIT_SUCCESS; +} diff --git a/src/neural_net.c b/src/neural_net.c new file mode 100644 index 0000000..55691a7 --- /dev/null +++ b/src/neural_net.c @@ -0,0 +1,59 @@ +/* See LICENSE file for copyright and license details. */ + +#include +#include + +#include "../headers/file.h" +#include "../headers/neural_net.h" + +NeuralNet +*new_neural_net(char *c_file_name, char *i_file_name) +{ + int i; + NeuralNet *n; + File *c_file = new_file(c_file_name); + File *i_file = new_file(i_file_name); + + n = malloc(sizeof(NeuralNet)); + n->l_num = get_columns_num(c_file, 0, ','); + n->l = calloc(n->l_num, sizeof(Layer *)); + int p_num[n->l_num]; + int i_num[n->l_num]; + + for (i=0; il_num-1; i++) { + p_num[i] = get_entry(c_file, 0, i, ',') + 1; + } + p_num[n->l_num-1] = get_entry(c_file, 0, n->l_num-1, ','); + + i_num[0] = get_columns_num(i_file, 0, ','); + for (i=1; il_num; i++) { + i_num[i] = p_num[i-1]; + } + + free_file(c_file); + free_file(i_file); + + n->l[0] = new_input_layer(p_num[0]); + for (i=1; il_num; i++) { + n->l[i] = new_layer(p_num[i], i_num[i]); + } + + if (n==NULL || n->l==NULL) { + fprintf(stderr, "memory for neural net was not allocated.\n"); + exit(EXIT_FAILURE); + } + + return n; +} + +void +free_neural_net(NeuralNet *n) +{ + int i; + + for (i=0; il_num; i++) { + free_layer(n->l[i]); + } + free(n->l); + free(n); +} diff --git a/src/perceptron.c b/src/perceptron.c new file mode 100644 index 0000000..dab850f --- /dev/null +++ b/src/perceptron.c @@ -0,0 +1,30 @@ +/* See LICENSE file for copyright and license details. */ + +#include +#include + +#include "../headers/perceptron.h" + +Perceptron +*new_perceptron(int i_num) +{ + Perceptron *p; + + p = malloc(sizeof(Perceptron)); + p->i_num = i_num; + p->weights = calloc(i_num, sizeof(double)); + + if (p==NULL || p->weights==NULL) { + fprintf(stderr, "memory for perceptron was not allocated.\n"); + exit(EXIT_FAILURE); + } + + return p; +} + +void +free_perceptron(Perceptron *p) +{ + free(p->weights); + free(p); +} diff --git a/src/training.c b/src/training.c new file mode 100644 index 0000000..3ec0780 --- /dev/null +++ b/src/training.c @@ -0,0 +1,277 @@ +/* See LICENSE file for copyright and license details. */ + +#include +#include +#include +#include + +#include "../headers/file.h" +#include "../headers/neural_net.h" + +typedef struct { + File *i_file; + File *t_file; + double t_factor; + int rows_num; + double *output_deltas; + double **small_deltas; + double ***weight_deltas; +} TrainingInfo; + +static TrainingInfo *new_training_info(NeuralNet *n, char *i_name, + char *t_name, double t_factor); +static void free_training_info(NeuralNet *n, TrainingInfo *t); +static void init_inputs(NeuralNet *n); +static void init_weights(NeuralNet *n); +static void set_input_layer(NeuralNet *n, TrainingInfo *t, int i_file_row); +static void calculate_outputs(NeuralNet *n); +static void calculate_output_deltas(NeuralNet *n, TrainingInfo *t, int row); +static void calculate_small_deltas(NeuralNet *n, TrainingInfo *t); +static void calculate_weight_deltas(NeuralNet *n, TrainingInfo *t); +static void calculate_weights(NeuralNet *n, TrainingInfo *t); +static void print_weights(NeuralNet *n); + +void +train_by_backpropagation(NeuralNet *n, int runs, double t_factor, char *i_name, + char *t_name) +{ + int i, j; + TrainingInfo *t; + + t = new_training_info(n, i_name, t_name, t_factor); + init_inputs(n); + init_weights(n); + + for (i=0; irows_num; j++) { + set_input_layer(n, t, j); + calculate_outputs(n); + calculate_output_deltas(n, t, j); + calculate_small_deltas(n, t); + calculate_weight_deltas(n, t); + calculate_weights(n, t); + } + } + print_weights(n); + + free_training_info(n, t); +} + +static TrainingInfo +*new_training_info(NeuralNet *n, char *i_name, char *t_name, double t_factor) +{ + int i, j; + TrainingInfo *t; + + t = malloc(sizeof(TrainingInfo)); + t->i_file = new_file(i_name); + t->t_file = new_file(t_name); + t->t_factor = t_factor; + t->rows_num = get_rows_num(t->i_file); + t->output_deltas = calloc(n->l[n->l_num-1]->p_num, sizeof(double)); + t->small_deltas = calloc(n->l_num-1, sizeof(double *)); + for (i=0; il_num; i++) { + t->small_deltas[i] = calloc(n->l[i]->p_num, sizeof(double)); + } + t->weight_deltas = calloc(n->l_num-1, sizeof(double **)); + for (i=1; il_num; i++) { + t->weight_deltas[i] = calloc(n->l[i]->p_num, sizeof(double *)); + for (j=0; jl[i]->p_num; j++) { + t->weight_deltas[i][j] = calloc(n->l[i-1]->p_num, + sizeof(double)); + } + } + + return t; +} + +static void +free_training_info(NeuralNet *n, TrainingInfo *t) +{ + int i, j; + + free_file(t->i_file); + free_file(t->t_file); + + free(t->output_deltas); + for (i=1; il_num; i++) { + free(t->small_deltas[i]); + } + free(t->small_deltas); + for (i=1; il_num; i++) { + for (j=0; jl[i]->p_num; j++) { + free(t->weight_deltas[i][j]); + } + free(t->weight_deltas[i]); + } + free(t->weight_deltas); + free(t); +} + +static void +init_inputs(NeuralNet *n) +{ + int i; + + for (i=0; il_num-1; i++) { + n->l[i]->p[0]->output = 1; + } +} + +static void +init_weights(NeuralNet *n) +{ + int i, j, k; + + srand(time(NULL)); + for (i=1; il_num; i++) { + for (j=1; jl[i]->p_num; j++) { + for (k=0; kl[i]->p[j]->i_num; k++) { + n->l[i]->p[j]->weights[k] = (double) rand() + / RAND_MAX; + } + } + } +} + +static void +set_input_layer(NeuralNet *n, TrainingInfo *t, int i_file_row) +{ + int i; + + for (i=1; il[0]->p_num-1; i++) { + n->l[0]->p[i]->output = + get_entry(t->i_file, i_file_row, i, ','); + } +} + +static void +calculate_outputs(NeuralNet *n) +{ + int i, j, k; + int x; + + for (i=1; il_num; i++) { + for (j=1; jl[i]->p_num; j++) { + x = 0; + for (k=0; kl[i]->p[j]->i_num; k++) { + x -= n->l[i]->p[j]->weights[k] * + n->l[i-1]->p[k]->output; + } + n->l[i]->p[j]->output = 1 / (1 + exp(x)); + } + } +} + +static void +calculate_output_deltas(NeuralNet *n, TrainingInfo *t, int row) +{ + int i; + + for (i=0; il[n->l_num-1]->p_num; i++) { + t->output_deltas[i] = get_entry(t->t_file, row, i, ',') + - n->l[n->l_num-1]->p[i]->output; + } +} + +static void +calculate_small_deltas(NeuralNet *n, TrainingInfo *t) +{ + int i, j, k; + double sum; + + for (i=0; il[n->l_num-1]->p_num; i++) { + t->small_deltas[n->l_num-1][i] = t->output_deltas[i] + * (1 - n->l[n->l_num-1]->p[i]->output) + * n->l[n->l_num-1]->p[i]->output; + } + + sum = 0; + for (i=0; il[n->l_num-2]->p_num; i++) { + sum += t->small_deltas[n->l_num-1][i]; + } + for (i=1; il[n->l_num-2]->p_num; i++) { + for (j=0; jl[n->l_num-1]->p_num; j++) { + t->small_deltas[n->l_num-2][i-1] + = (1 - n->l[n->l_num-2]->p[j]->output) + * n->l[n->l_num-2]->p[j]->output + * sum + * n->l[n->l_num-1]->p[j]->weights[i]; + } + } + + for (i=n->l_num-3; i>0; i--) { + sum = 0; + for (j=1; jl[i+1]->p_num; j++) { + sum += t->small_deltas[i+1][j-1]; + } + for (j=0; jl[i]->p_num; j++) { + for (k=1; kl[i+1]->p_num; k++) { + t->small_deltas[i][j] + = (1 - n->l[i]->p[k]->output) + * n->l[i]->p[k]->output + * sum + * n->l[i+1]->p[k]->weights[j]; + } + } + } +} + +static void +calculate_weight_deltas(NeuralNet *n, TrainingInfo *t) +{ + int i, j, k; + + for (i=n->l_num-1; i>0; i--) { + for (j=0; jl[i]->p_num; j++) { + for (k=0; kl[i-1]->p_num; k++) { + t->weight_deltas[i][j][k] = t->t_factor + * t->small_deltas[i][j] + * n->l[i-1]->p[k]->output; + } + } + } +} + +static void +calculate_weights(NeuralNet *n, TrainingInfo *t) +{ + int i, j, k; + + for (i=n->l_num-1; i>0; i--) { + for (j=0; jl[i]->p_num; j++) { + for (k=0; kl[i-1]->p_num; k++) { + n->l[i]->p[j]->weights[k] + = n->l[i]->p[j]->weights[k] + - t->weight_deltas[i][j][k]; + } + } + } +} + +static void +print_weights(NeuralNet *n) +{ + int i, j, k; + + for (i=1; il_num-1; i++) { + fprintf(stderr, "L %d", i); + for (j=1; jl[i]->p_num; j++) { + fprintf(stderr, "\tP %d\n", j); + for (k=0; kl[i-1]->p_num; k++) { + printf("%lf,", n->l[i]->p[j]->weights[k]); + } + printf("\n"); + } + printf("\n"); + } + fprintf(stderr, "L %d", i); + for (i=0; il[n->l_num-1]->p_num; i++) { + fprintf(stderr, "\tP %d\n", i); + for (j=0; jl[n->l_num-2]->p_num; j++) { + printf("%lf,", n->l[n->l_num-1]->p[i]->weights[j]); + } + printf("\n"); + } + printf("\n"); +}