Algorithms_in_C++ 1.0.0
Set of algorithms implemented in C++.
Loading...
Searching...
No Matches
kohonen_som_topology.cpp File Reference

Kohonen self organizing map (topological map) More...

#include <algorithm>
#include <array>
#include <cerrno>
#include <cmath>
#include <cstdlib>
#include <cstring>
#include <ctime>
#include <fstream>
#include <iostream>
#include <valarray>
#include <vector>
Include dependency graph for kohonen_som_topology.cpp:

Namespaces

namespace  machine_learning
 A* search algorithm
 

Functions

double _random (double a, double b)
 
int save_2d_data (const char *fname, const std::vector< std::valarray< double > > &X)
 
void get_min_2d (const std::vector< std::valarray< double > > &X, double *val, int *x_idx, int *y_idx)
 
int machine_learning::save_u_matrix (const char *fname, const std::vector< std::vector< std::valarray< double > > > &W)
 
double machine_learning::update_weights (const std::valarray< double > &X, std::vector< std::vector< std::valarray< double > > > *W, std::vector< std::valarray< double > > *D, double alpha, int R)
 
void machine_learning::kohonen_som (const std::vector< std::valarray< double > > &X, std::vector< std::vector< std::valarray< double > > > *W, double alpha_min)
 
void test_2d_classes (std::vector< std::valarray< double > > *data)
 
void test1 ()
 
void test_3d_classes1 (std::vector< std::valarray< double > > *data)
 
void test2 ()
 
void test_3d_classes2 (std::vector< std::valarray< double > > *data)
 
void test3 ()
 
double get_clock_diff (clock_t start_t, clock_t end_t)
 
int main (int argc, char **argv)
 

Variables

constexpr double machine_learning::MIN_DISTANCE = 1e-4
 

Detailed Description

Kohonen self organizing map (topological map)

Author
Krishna Vedala

This example implements a powerful unsupervised learning algorithm called as a self organizing map. The algorithm creates a connected network of weights that closely follows the given data points. This thus creates a topological map of the given data i.e., it maintains the relationship between varipus data points in a much higher dimesional space by creating an equivalent in a 2-dimensional space. Trained topological maps for the test cases in the program

Note
This C++ version of the program is considerable slower than its C counterpart
The compiled code is much slower when compiled with MS Visual C++ 2019 than with GCC on windows
See also
kohonen_som_trace.cpp

Function Documentation

◆ get_clock_diff()

double get_clock_diff ( clock_t start_t,
clock_t end_t )

Convert clock cycle difference to time in seconds

Parameters
[in]start_tstart clock
[in]end_tend clock
Returns
time difference in seconds
577 {
578 return static_cast<double>(end_t - start_t) / CLOCKS_PER_SEC;
579}

◆ main()

int main ( int argc,
char ** argv )

Main function

582 {
583#ifdef _OPENMP
584 std::cout << "Using OpenMP based parallelization\n";
585#else
586 std::cout << "NOT using OpenMP based parallelization\n";
587#endif
588
589 std::srand(std::time(nullptr));
590
591 std::clock_t start_clk = std::clock();
592 test1();
593 auto end_clk = std::clock();
594 std::cout << "Test 1 completed in " << get_clock_diff(start_clk, end_clk)
595 << " sec\n";
596
597 start_clk = std::clock();
598 test2();
599 end_clk = std::clock();
600 std::cout << "Test 2 completed in " << get_clock_diff(start_clk, end_clk)
601 << " sec\n";
602
603 start_clk = std::clock();
604 test3();
605 end_clk = std::clock();
606 std::cout << "Test 3 completed in " << get_clock_diff(start_clk, end_clk)
607 << " sec\n";
608
610 << "(Note: Calculated times include: creating test sets, training "
611 "model and writing files to disk.)\n\n";
612 return 0;
613}
T clock(T... args)
void test2()
Definition kohonen_som_topology.cpp:451
void test1()
Definition kohonen_som_topology.cpp:369
double get_clock_diff(clock_t start_t, clock_t end_t)
Definition kohonen_som_topology.cpp:577
void test3()
Definition kohonen_som_topology.cpp:537
T srand(T... args)
T time(T... args)
Here is the call graph for this function:

◆ test1()

void test1 ( )

Test that creates a random set of points distributed in four clusters in circumference of a circle and trains an SOM that finds that circular pattern. The following CSV files are created to validate the execution:

  • test1.csv: random test samples points with a circular pattern
  • w11.csv: initial random map
  • w12.csv: trained SOM map
369 {
370 int j = 0, N = 300;
371 int features = 2;
372 int num_out = 30;
375 for (int i = 0; i < std::max(num_out, N); i++) {
376 // loop till max(N, num_out)
377 if (i < N) { // only add new arrays if i < N
378 X[i] = std::valarray<double>(features);
379 }
380 if (i < num_out) { // only add new arrays if i < num_out
381 W[i] = std::vector<std::valarray<double>>(num_out);
382 for (int k = 0; k < num_out; k++) {
383 W[i][k] = std::valarray<double>(features);
384#ifdef _OPENMP
385#pragma omp for
386#endif
387 for (j = 0; j < features; j++) {
388 // preallocate with random initial weights
389 W[i][k][j] = _random(-10, 10);
390 }
391 }
392 }
393 }
394
395 test_2d_classes(&X); // create test data around circumference of a circle
396 save_2d_data("test1.csv", X); // save test data points
397 save_u_matrix("w11.csv", W); // save initial random weights
398 kohonen_som(X, &W, 1e-4); // train the SOM
399 save_u_matrix("w12.csv", W); // save the resultant weights
400}
double k(double x)
Another test function.
Definition composite_simpson_rule.cpp:117
constexpr uint32_t N
A struct to represent sparse table for min() as their invariant function, for the given array A....
Definition sparse_table.cpp:47
int save_2d_data(const char *fname, const std::vector< std::valarray< double > > &X)
Definition kohonen_som_topology.cpp:65
double _random(double a, double b)
Definition kohonen_som_topology.cpp:53
void test_2d_classes(std::vector< std::valarray< double > > *data)
Definition kohonen_som_topology.cpp:330
T max(T... args)
Here is the call graph for this function:

◆ test2()

void test2 ( )

Test that creates a random set of points distributed in 4 clusters in 3D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:

  • test2.csv: random test samples points with a lamniscate pattern
  • w21.csv: initial random map
  • w22.csv: trained SOM map
451 {
452 int j = 0, N = 300;
453 int features = 3;
454 int num_out = 30;
457 for (int i = 0; i < std::max(num_out, N); i++) {
458 // loop till max(N, num_out)
459 if (i < N) { // only add new arrays if i < N
460 X[i] = std::valarray<double>(features);
461 }
462 if (i < num_out) { // only add new arrays if i < num_out
463 W[i] = std::vector<std::valarray<double>>(num_out);
464 for (int k = 0; k < num_out; k++) {
465 W[i][k] = std::valarray<double>(features);
466#ifdef _OPENMP
467#pragma omp for
468#endif
469 for (j = 0; j < features; j++) {
470 // preallocate with random initial weights
471 W[i][k][j] = _random(-10, 10);
472 }
473 }
474 }
475 }
476
477 test_3d_classes1(&X); // create test data around circumference of a circle
478 save_2d_data("test2.csv", X); // save test data points
479 save_u_matrix("w21.csv", W); // save initial random weights
480 kohonen_som(X, &W, 1e-4); // train the SOM
481 save_u_matrix("w22.csv", W); // save the resultant weights
482}
void test_3d_classes1(std::vector< std::valarray< double > > *data)
Definition kohonen_som_topology.cpp:411
Here is the call graph for this function:

◆ test3()

void test3 ( )

Test that creates a random set of points distributed in eight clusters in 3D space and trains an SOM that finds the topological pattern. The following CSV files are created to validate the execution:

  • test3.csv: random test samples points with a circular pattern
  • w31.csv: initial random map
  • w32.csv: trained SOM map
537 {
538 int j = 0, N = 500;
539 int features = 3;
540 int num_out = 30;
543 for (int i = 0; i < std::max(num_out, N); i++) {
544 // loop till max(N, num_out)
545 if (i < N) { // only add new arrays if i < N
546 X[i] = std::valarray<double>(features);
547 }
548 if (i < num_out) { // only add new arrays if i < num_out
549 W[i] = std::vector<std::valarray<double>>(num_out);
550 for (int k = 0; k < num_out; k++) {
551 W[i][k] = std::valarray<double>(features);
552#ifdef _OPENMP
553#pragma omp for
554#endif
555 for (j = 0; j < features; j++) {
556 // preallocate with random initial weights
557 W[i][k][j] = _random(-10, 10);
558 }
559 }
560 }
561 }
562
563 test_3d_classes2(&X); // create test data around circumference of a circle
564 save_2d_data("test3.csv", X); // save test data points
565 save_u_matrix("w31.csv", W); // save initial random weights
566 kohonen_som(X, &W, 1e-4); // train the SOM
567 save_u_matrix("w32.csv", W); // save the resultant weights
568}
void test_3d_classes2(std::vector< std::valarray< double > > *data)
Definition kohonen_som_topology.cpp:493
Here is the call graph for this function:

◆ test_2d_classes()

void test_2d_classes ( std::vector< std::valarray< double > > * data)

Creates a random set of points distributed in four clusters in 3D space with centroids at the points

  • \((0,5, 0.5, 0.5)\)
  • \((0,5,-0.5, -0.5)\)
  • \((-0,5, 0.5, 0.5)\)
  • \((-0,5,-0.5, -0.5)\)
Parameters
[out]datamatrix to store data in
330 {
331 const int N = data->size();
332 const double R = 0.3; // radius of cluster
333 int i = 0;
334 const int num_classes = 4;
335 std::array<std::array<double, 2>, num_classes> centres = {
336 // centres of each class cluster
337 std::array<double, 2>({.5, .5}), // centre of class 1
338 std::array<double, 2>({.5, -.5}), // centre of class 2
339 std::array<double, 2>({-.5, .5}), // centre of class 3
340 std::array<double, 2>({-.5, -.5}) // centre of class 4
341 };
342
343#ifdef _OPENMP
344#pragma omp for
345#endif
346 for (i = 0; i < N; i++) {
347 // select a random class for the point
348 int cls = std::rand() % num_classes;
349
350 // create random coordinates (x,y,z) around the centre of the class
351 data[0][i][0] = _random(centres[cls][0] - R, centres[cls][0] + R);
352 data[0][i][1] = _random(centres[cls][1] - R, centres[cls][1] + R);
353
354 /* The follosing can also be used
355 for (int j = 0; j < 2; j++)
356 data[i][j] = _random(centres[class][j] - R, centres[class][j] + R);
357 */
358 }
359}
int data[MAX]
test data
Definition hash_search.cpp:24
T rand(T... args)
Here is the call graph for this function:

◆ test_3d_classes1()

void test_3d_classes1 ( std::vector< std::valarray< double > > * data)

Creates a random set of points distributed in four clusters in 3D space with centroids at the points

  • \((0,5, 0.5, 0.5)\)
  • \((0,5,-0.5, -0.5)\)
  • \((-0,5, 0.5, 0.5)\)
  • \((-0,5,-0.5, -0.5)\)
Parameters
[out]datamatrix to store data in
411 {
412 const size_t N = data->size();
413 const double R = 0.3; // radius of cluster
414 int i = 0;
415 const int num_classes = 4;
416 const std::array<std::array<double, 3>, num_classes> centres = {
417 // centres of each class cluster
418 std::array<double, 3>({.5, .5, .5}), // centre of class 1
419 std::array<double, 3>({.5, -.5, -.5}), // centre of class 2
420 std::array<double, 3>({-.5, .5, .5}), // centre of class 3
421 std::array<double, 3>({-.5, -.5 - .5}) // centre of class 4
422 };
423
424#ifdef _OPENMP
425#pragma omp for
426#endif
427 for (i = 0; i < N; i++) {
428 // select a random class for the point
429 int cls = std::rand() % num_classes;
430
431 // create random coordinates (x,y,z) around the centre of the class
432 data[0][i][0] = _random(centres[cls][0] - R, centres[cls][0] + R);
433 data[0][i][1] = _random(centres[cls][1] - R, centres[cls][1] + R);
434 data[0][i][2] = _random(centres[cls][2] - R, centres[cls][2] + R);
435
436 /* The follosing can also be used
437 for (int j = 0; j < 3; j++)
438 data[i][j] = _random(centres[class][j] - R, centres[class][j] + R);
439 */
440 }
441}
Here is the call graph for this function:

◆ test_3d_classes2()

void test_3d_classes2 ( std::vector< std::valarray< double > > * data)

Creates a random set of points distributed in four clusters in 3D space with centroids at the points

  • \((0,5, 0.5, 0.5)\)
  • \((0,5,-0.5, -0.5)\)
  • \((-0,5, 0.5, 0.5)\)
  • \((-0,5,-0.5, -0.5)\)
Parameters
[out]datamatrix to store data in
493 {
494 const size_t N = data->size();
495 const double R = 0.2; // radius of cluster
496 int i = 0;
497 const int num_classes = 8;
498 const std::array<std::array<double, 3>, num_classes> centres = {
499 // centres of each class cluster
500 std::array<double, 3>({.5, .5, .5}), // centre of class 1
501 std::array<double, 3>({.5, .5, -.5}), // centre of class 2
502 std::array<double, 3>({.5, -.5, .5}), // centre of class 3
503 std::array<double, 3>({.5, -.5, -.5}), // centre of class 4
504 std::array<double, 3>({-.5, .5, .5}), // centre of class 5
505 std::array<double, 3>({-.5, .5, -.5}), // centre of class 6
506 std::array<double, 3>({-.5, -.5, .5}), // centre of class 7
507 std::array<double, 3>({-.5, -.5, -.5}) // centre of class 8
508 };
509
510#ifdef _OPENMP
511#pragma omp for
512#endif
513 for (i = 0; i < N; i++) {
514 // select a random class for the point
515 int cls = std::rand() % num_classes;
516
517 // create random coordinates (x,y,z) around the centre of the class
518 data[0][i][0] = _random(centres[cls][0] - R, centres[cls][0] + R);
519 data[0][i][1] = _random(centres[cls][1] - R, centres[cls][1] + R);
520 data[0][i][2] = _random(centres[cls][2] - R, centres[cls][2] + R);
521
522 /* The follosing can also be used
523 for (int j = 0; j < 3; j++)
524 data[i][j] = _random(centres[class][j] - R, centres[class][j] + R);
525 */
526 }
527}
Here is the call graph for this function: