d5/db0/adaline__learning_8cpp_source.html

#include <array>

#include <cassert>

#include <climits>

#include <cmath>

#include <cstdlib>

#include <ctime>

#include <iostream>

#include <numeric>

#include <vector>


constexpr int MAX_ITER = 500;  // INT_MAX


namespace machine_learning {


class adaline {

 public:


    explicit adaline(int num_features, const double eta = 0.01f,

                     const double accuracy = 1e-5)

        : eta(eta), accuracy(accuracy) {

        if (eta <= 0) {

            std::cerr << "learning rate should be positive and nonzero"

                      << std::endl;

            std::exit(EXIT_FAILURE);

        }


        weights = std::vector<double>(

            num_features +

            1);  // additional weight is for the constant bias term


        // initialize with random weights in the range [-50, 49]

        for (double &weight : weights) weight = 1.f;

        // weights[i] = (static_cast<double>(std::rand() % 100) - 50);

    }


    friend std::ostream &operator<<(std::ostream &out, const adaline &ada) {

        out << "<";

        for (int i = 0; i < ada.weights.size(); i++) {

            out << ada.weights[i];

            if (i < ada.weights.size() - 1) {

                out << ", ";

            }

        }

        out << ">";

        return out;

    }


    int predict(const std::vector<double> &x, double *out = nullptr) {

        if (!check_size_match(x)) {

            return 0;

        }


        double y = weights.back();  // assign bias value


        // for (int i = 0; i < x.size(); i++) y += x[i] * weights[i];

        y = std::inner_product(x.begin(), x.end(), weights.begin(), y);


        if (out != nullptr) {  // if out variable is provided

            *out = y;

        }


        return activation(y);  // quantizer: apply ADALINE threshold function

    }


    double fit(const std::vector<double> &x, const int &y) {

        if (!check_size_match(x)) {

            return 0;

        }


        /* output of the model with current weights */

        int p = predict(x);

        int prediction_error = y - p;  // error in estimation

        double correction_factor = eta * prediction_error;


        /* update each weight, the last weight is the bias term */

        for (int i = 0; i < x.size(); i++) {

            weights[i] += correction_factor * x[i];

        }

        weights[x.size()] += correction_factor;  // update bias


        return correction_factor;

    }


    template <size_t N>


    void fit(std::array<std::vector<double>, N> const &X,

             std::array<int, N> const &Y) {

        double avg_pred_error = 1.f;


        int iter = 0;

        for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > accuracy);

             iter++) {

            avg_pred_error = 0.f;


            // perform fit for each sample

            for (int i = 0; i < N; i++) {

                double err = fit(X[i], Y[i]);

                avg_pred_error += std::abs(err);

            }

            avg_pred_error /= N;


            // Print updates every 200th iteration

            // if (iter % 100 == 0)

            std::cout << "\tIter " << iter << ": Training weights: " << *this

                      << "\tAvg error: " << avg_pred_error << std::endl;

        }


        if (iter < MAX_ITER) {

            std::cout << "Converged after " << iter << " iterations."

                      << std::endl;

        } else {

            std::cout << "Did not converge after " << iter << " iterations."

                      << std::endl;

        }

    }


    int activation(double x) { return x > 0 ? 1 : -1; }


 private:


    bool check_size_match(const std::vector<double> &x) {

        if (x.size() != (weights.size() - 1)) {

            std::cerr << __func__ << ": "

                      << "Number of features in x does not match the feature "

                         "dimension in model!"

                      << std::endl;

            return false;

        }

        return true;

    }


    const double eta;

    const double accuracy;

    std::vector<double> weights;

};


}  // namespace machine_learning


using machine_learning::adaline;


void test1(double eta = 0.01) {

    adaline ada(2, eta);  // 2 features


    const int N = 10;  // number of sample points


    std::array<std::vector<double>, N> X = {

        std::vector<double>({0, 1}),   std::vector<double>({1, -2}),

        std::vector<double>({2, 3}),   std::vector<double>({3, -1}),

        std::vector<double>({4, 1}),   std::vector<double>({6, -5}),

        std::vector<double>({-7, -3}), std::vector<double>({-8, 5}),

        std::vector<double>({-9, 2}),  std::vector<double>({-10, -15})};

    std::array<int, N> y = {1,  -1, 1, -1, -1,

                            -1, 1,  1, 1,  -1};  // corresponding y-values


    std::cout << "------- Test 1 -------" << std::endl;

    std::cout << "Model before fit: " << ada << std::endl;


    ada.fit<N>(X, y);

    std::cout << "Model after fit: " << ada << std::endl;


    int predict = ada.predict({5, -3});

    std::cout << "Predict for x=(5,-3): " << predict;

    assert(predict == -1);

    std::cout << " ...passed" << std::endl;


    predict = ada.predict({5, 8});

    std::cout << "Predict for x=(5,8): " << predict;

    assert(predict == 1);

    std::cout << " ...passed" << std::endl;

}


void test2(double eta = 0.01) {

    adaline ada(2, eta);  // 2 features


    const int N = 50;  // number of sample points


    std::array<std::vector<double>, N> X;

    std::array<int, N> Y{};  // corresponding y-values


    // generate sample points in the interval

    // [-range2/100 , (range2-1)/100]

    int range = 500;          // sample points full-range

    int range2 = range >> 1;  // sample points half-range

    for (int i = 0; i < N; i++) {

        double x0 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        double x1 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        X[i] = std::vector<double>({x0, x1});

        Y[i] = (x0 + 3. * x1) > -1 ? 1 : -1;

    }


    std::cout << "------- Test 2 -------" << std::endl;

    std::cout << "Model before fit: " << ada << std::endl;


    ada.fit(X, Y);

    std::cout << "Model after fit: " << ada << std::endl;


    int N_test_cases = 5;

    for (int i = 0; i < N_test_cases; i++) {

        double x0 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        double x1 = (static_cast<double>(std::rand() % range) - range2) / 100.f;


        int predict = ada.predict({x0, x1});


        std::cout << "Predict for x=(" << x0 << "," << x1 << "): " << predict;


        int expected_val = (x0 + 3. * x1) > -1 ? 1 : -1;

        assert(predict == expected_val);

        std::cout << " ...passed" << std::endl;

    }

}


void test3(double eta = 0.01) {

    adaline ada(6, eta);  // 2 features


    const int N = 100;  // number of sample points


    std::array<std::vector<double>, N> X;

    std::array<int, N> Y{};  // corresponding y-values


    // generate sample points in the interval

    // [-range2/100 , (range2-1)/100]

    int range = 200;          // sample points full-range

    int range2 = range >> 1;  // sample points half-range

    for (int i = 0; i < N; i++) {

        double x0 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        double x1 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        double x2 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        X[i] = std::vector<double>({x0, x1, x2, x0 * x0, x1 * x1, x2 * x2});

        Y[i] = ((x0 * x0) + (x1 * x1) + (x2 * x2)) <= 1.f ? 1 : -1;

    }


    std::cout << "------- Test 3 -------" << std::endl;

    std::cout << "Model before fit: " << ada << std::endl;


    ada.fit(X, Y);

    std::cout << "Model after fit: " << ada << std::endl;


    int N_test_cases = 5;

    for (int i = 0; i < N_test_cases; i++) {

        double x0 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        double x1 = (static_cast<double>(std::rand() % range) - range2) / 100.f;

        double x2 = (static_cast<double>(std::rand() % range) - range2) / 100.f;


        int predict = ada.predict({x0, x1, x2, x0 * x0, x1 * x1, x2 * x2});


        std::cout << "Predict for x=(" << x0 << "," << x1 << "," << x2

                  << "): " << predict;


        int expected_val = ((x0 * x0) + (x1 * x1) + (x2 * x2)) <= 1.f ? 1 : -1;

        assert(predict == expected_val);

        std::cout << " ...passed" << std::endl;

    }

}


int main(int argc, char **argv) {

    std::srand(std::time(nullptr));  // initialize random number generator


    double eta = 0.1;  // default value of eta

    if (argc == 2) {   // read eta value from commandline argument if present

        eta = strtof(argv[1], nullptr);

    }


    test1(eta);


    std::cout << "Press ENTER to continue..." << std::endl;

    std::cin.get();


    test2(eta);


    std::cout << "Press ENTER to continue..." << std::endl;

    std::cin.get();


    test3(eta);


    return 0;

}


adaline::adaline
adaline(int num_features, const double eta=0.01f, const double accuracy=1e-5)
Definition adaline_learning.cpp:55

machine_learning::adaline
Definition adaline_learning.cpp:46

machine_learning::adaline::activation
int activation(double x)
Definition adaline_learning.cpp:186

machine_learning::adaline::adaline
adaline(int num_features, const double eta=0.01f, const double accuracy=1e-5)
Definition adaline_learning.cpp:55

machine_learning::adaline::eta
const double eta
learning rate of the algorithm
Definition adaline_learning.cpp:207

machine_learning::adaline::weights
std::vector< double > weights
weights of the neural network
Definition adaline_learning.cpp:209

machine_learning::adaline::fit
double fit(const std::vector< double > &x, const int &y)
Definition adaline_learning.cpp:119

machine_learning::adaline::fit
void fit(std::array< std::vector< double >, N > const &X, std::array< int, N > const &Y)
Definition adaline_learning.cpp:145

machine_learning::adaline::accuracy
const double accuracy
model fit convergence accuracy
Definition adaline_learning.cpp:208

machine_learning::adaline::predict
int predict(const std::vector< double > &x, double *out=nullptr)
Definition adaline_learning.cpp:95

machine_learning::adaline::check_size_match
bool check_size_match(const std::vector< double > &x)
Definition adaline_learning.cpp:196

machine_learning::adaline::operator<<
friend std::ostream & operator<<(std::ostream &out, const adaline &ada)
Definition adaline_learning.cpp:76

test2
static void test2()
Self-implementations, 2nd test.
Definition dsu_path_compression.cpp:187

test1
static void test1()
Self-test implementations, 1st test.
Definition dsu_path_compression.cpp:170

main
int main()
Main function.
Definition generate_parentheses.cpp:110

MAX_ITER
constexpr int MAX_ITER
Definition adaline_learning.cpp:40

test3
static void test3()
Definition hamiltons_cycle.cpp:122

machine_learning
A* search algorithm