TheAlgorithms/C++ 1.0.0
All the algorithms implemented in C++
Loading...
Searching...
No Matches
ordinary_least_squares_regressor.cpp
Go to the documentation of this file.
1
12#include <cassert>
13#include <cmath> // for std::abs
14#include <iomanip> // for print formatting
15#include <iostream>
16#include <vector>
17
21template <typename T>
22std::ostream &operator<<(std::ostream &out,
23 std::vector<std::vector<T>> const &v) {
24 const int width = 10;
25 const char separator = ' ';
26
27 for (size_t row = 0; row < v.size(); row++) {
28 for (size_t col = 0; col < v[row].size(); col++) {
29 out << std::left << std::setw(width) << std::setfill(separator)
30 << v[row][col];
31 }
32 out << std::endl;
33 }
34
35 return out;
36}
37
41template <typename T>
42std::ostream &operator<<(std::ostream &out, std::vector<T> const &v) {
43 const int width = 15;
44 const char separator = ' ';
45
46 for (size_t row = 0; row < v.size(); row++) {
47 out << std::left << std::setw(width) << std::setfill(separator)
48 << v[row];
49 }
50
51 return out;
52}
53
58template <typename T>
59inline bool is_square(std::vector<std::vector<T>> const &A) {
60 // Assuming A is square matrix
61 size_t N = A.size();
62 for (size_t i = 0; i < N; i++) {
63 if (A[i].size() != N) {
64 return false;
65 }
66 }
67 return true;
68}
69
77template <typename T>
78std::vector<std::vector<T>> operator*(std::vector<std::vector<T>> const &A,
79 std::vector<std::vector<T>> const &B) {
80 // Number of rows in A
81 size_t N_A = A.size();
82 // Number of columns in B
83 size_t N_B = B[0].size();
84
85 std::vector<std::vector<T>> result(N_A);
86
87 if (A[0].size() != B.size()) {
88 std::cerr << "Number of columns in A != Number of rows in B ("
89 << A[0].size() << ", " << B.size() << ")" << std::endl;
90 return result;
91 }
92
93 for (size_t row = 0; row < N_A; row++) {
94 std::vector<T> v(N_B);
95 for (size_t col = 0; col < N_B; col++) {
96 v[col] = static_cast<T>(0);
97 for (size_t j = 0; j < B.size(); j++) {
98 v[col] += A[row][j] * B[j][col];
99 }
100 }
101 result[row] = v;
102 }
103
104 return result;
105}
106
111template <typename T>
112std::vector<T> operator*(std::vector<std::vector<T>> const &A,
113 std::vector<T> const &B) {
114 // Number of rows in A
115 size_t N_A = A.size();
116
117 std::vector<T> result(N_A);
118
119 if (A[0].size() != B.size()) {
120 std::cerr << "Number of columns in A != Number of rows in B ("
121 << A[0].size() << ", " << B.size() << ")" << std::endl;
122 return result;
123 }
124
125 for (size_t row = 0; row < N_A; row++) {
126 result[row] = static_cast<T>(0);
127 for (size_t j = 0; j < B.size(); j++) result[row] += A[row][j] * B[j];
128 }
129
130 return result;
131}
132
137template <typename T>
138std::vector<float> operator*(float const scalar, std::vector<T> const &A) {
139 // Number of rows in A
140 size_t N_A = A.size();
141
142 std::vector<float> result(N_A);
143
144 for (size_t row = 0; row < N_A; row++) {
145 result[row] += A[row] * static_cast<float>(scalar);
146 }
147
148 return result;
149}
150
155template <typename T>
156std::vector<float> operator*(std::vector<T> const &A, float const scalar) {
157 // Number of rows in A
158 size_t N_A = A.size();
159
160 std::vector<float> result(N_A);
161
162 for (size_t row = 0; row < N_A; row++) {
163 result[row] = A[row] * static_cast<float>(scalar);
164 }
165
166 return result;
167}
168
173template <typename T>
174std::vector<float> operator/(std::vector<T> const &A, float const scalar) {
175 return (1.f / scalar) * A;
176}
177
182template <typename T>
183std::vector<T> operator-(std::vector<T> const &A, std::vector<T> const &B) {
184 // Number of rows in A
185 size_t N = A.size();
186
187 std::vector<T> result(N);
188
189 if (B.size() != N) {
190 std::cerr << "Vector dimensions shouldbe identical!" << std::endl;
191 return A;
192 }
193
194 for (size_t row = 0; row < N; row++) result[row] = A[row] - B[row];
195
196 return result;
197}
198
203template <typename T>
204std::vector<T> operator+(std::vector<T> const &A, std::vector<T> const &B) {
205 // Number of rows in A
206 size_t N = A.size();
207
208 std::vector<T> result(N);
209
210 if (B.size() != N) {
211 std::cerr << "Vector dimensions shouldbe identical!" << std::endl;
212 return A;
213 }
214
215 for (size_t row = 0; row < N; row++) result[row] = A[row] + B[row];
216
217 return result;
218}
219
225template <typename T>
226std::vector<std::vector<float>> get_inverse(
227 std::vector<std::vector<T>> const &A) {
228 // Assuming A is square matrix
229 size_t N = A.size();
230
231 std::vector<std::vector<float>> inverse(N);
232 for (size_t row = 0; row < N; row++) {
233 // preallocatae a resultant identity matrix
234 inverse[row] = std::vector<float>(N);
235 for (size_t col = 0; col < N; col++) {
236 inverse[row][col] = (row == col) ? 1.f : 0.f;
237 }
238 }
239
240 if (!is_square(A)) {
241 std::cerr << "A must be a square matrix!" << std::endl;
242 return inverse;
243 }
244
245 // preallocatae a temporary matrix identical to A
246 std::vector<std::vector<float>> temp(N);
247 for (size_t row = 0; row < N; row++) {
248 std::vector<float> v(N);
249 for (size_t col = 0; col < N; col++) {
250 v[col] = static_cast<float>(A[row][col]);
251 }
252 temp[row] = v;
253 }
254
255 // start transformations
256 for (size_t row = 0; row < N; row++) {
257 for (size_t row2 = row; row2 < N && temp[row][row] == 0; row2++) {
258 // this to ensure diagonal elements are not 0
259 temp[row] = temp[row] + temp[row2];
260 inverse[row] = inverse[row] + inverse[row2];
261 }
262
263 for (size_t col2 = row; col2 < N && temp[row][row] == 0; col2++) {
264 // this to further ensure diagonal elements are not 0
265 for (size_t row2 = 0; row2 < N; row2++) {
266 temp[row2][row] = temp[row2][row] + temp[row2][col2];
267 inverse[row2][row] = inverse[row2][row] + inverse[row2][col2];
268 }
269 }
270
271 if (temp[row][row] == 0) {
272 // Probably a low-rank matrix and hence singular
273 std::cerr << "Low-rank matrix, no inverse!" << std::endl;
274 return inverse;
275 }
276
277 // set diagonal to 1
278 auto divisor = static_cast<float>(temp[row][row]);
279 temp[row] = temp[row] / divisor;
280 inverse[row] = inverse[row] / divisor;
281 // Row transformations
282 for (size_t row2 = 0; row2 < N; row2++) {
283 if (row2 == row) {
284 continue;
285 }
286 float factor = temp[row2][row];
287 temp[row2] = temp[row2] - factor * temp[row];
288 inverse[row2] = inverse[row2] - factor * inverse[row];
289 }
290 }
291
292 return inverse;
293}
294
299template <typename T>
300std::vector<std::vector<T>> get_transpose(
301 std::vector<std::vector<T>> const &A) {
302 std::vector<std::vector<T>> result(A[0].size());
303
304 for (size_t row = 0; row < A[0].size(); row++) {
305 std::vector<T> v(A.size());
306 for (size_t col = 0; col < A.size(); col++) v[col] = A[col][row];
307
308 result[row] = v;
309 }
310 return result;
311}
312
320template <typename T>
321std::vector<float> fit_OLS_regressor(std::vector<std::vector<T>> const &X,
322 std::vector<T> const &Y) {
323 // NxF
324 std::vector<std::vector<T>> X2 = X;
325 for (size_t i = 0; i < X2.size(); i++) {
326 // add Y-intercept -> Nx(F+1)
327 X2[i].push_back(1);
328 }
329 // (F+1)xN
330 std::vector<std::vector<T>> Xt = get_transpose(X2);
331 // (F+1)x(F+1)
332 std::vector<std::vector<T>> tmp = get_inverse(Xt * X2);
333 // (F+1)xN
334 std::vector<std::vector<float>> out = tmp * Xt;
335 // cout << endl
336 // << "Projection matrix: " << X2 * out << endl;
337
338 // Fx1,1 -> (F+1)^th element is the independent constant
339 return out * Y;
340}
341
351template <typename T>
352std::vector<float> predict_OLS_regressor(std::vector<std::vector<T>> const &X,
353 std::vector<float> const &beta
354) {
355 std::vector<float> result(X.size());
356
357 for (size_t rows = 0; rows < X.size(); rows++) {
358 // -> start with constant term
359 result[rows] = beta[X[0].size()];
360 for (size_t cols = 0; cols < X[0].size(); cols++) {
361 result[rows] += beta[cols] * X[rows][cols];
362 }
363 }
364 // Nx1
365 return result;
366}
367
369void ols_test() {
370 int F = 3, N = 5;
371
372 /* test function = x^2 -5 */
373 std::cout << "Test 1 (quadratic function)....";
374 // create training data set with features = x, x^2, x^3
375 std::vector<std::vector<float>> data1(
376 {{-5, 25, -125}, {-1, 1, -1}, {0, 0, 0}, {1, 1, 1}, {6, 36, 216}});
377 // create corresponding outputs
378 std::vector<float> Y1({20, -4, -5, -4, 31});
379 // perform regression modelling
380 std::vector<float> beta1 = fit_OLS_regressor(data1, Y1);
381 // create test data set with same features = x, x^2, x^3
382 std::vector<std::vector<float>> test_data1(
383 {{-2, 4, -8}, {2, 4, 8}, {-10, 100, -1000}, {10, 100, 1000}});
384 // expected regression outputs
385 std::vector<float> expected1({-1, -1, 95, 95});
386 // predicted regression outputs
387 std::vector<float> out1 = predict_OLS_regressor(test_data1, beta1);
388 // compare predicted results are within +-0.01 limit of expected
389 for (size_t rows = 0; rows < out1.size(); rows++) {
390 assert(std::abs(out1[rows] - expected1[rows]) < 0.01);
391 }
392 std::cout << "passed\n";
393
394 /* test function = x^3 + x^2 - 100 */
395 std::cout << "Test 2 (cubic function)....";
396 // create training data set with features = x, x^2, x^3
397 std::vector<std::vector<float>> data2(
398 {{-5, 25, -125}, {-1, 1, -1}, {0, 0, 0}, {1, 1, 1}, {6, 36, 216}});
399 // create corresponding outputs
400 std::vector<float> Y2({-200, -100, -100, 98, 152});
401 // perform regression modelling
402 std::vector<float> beta2 = fit_OLS_regressor(data2, Y2);
403 // create test data set with same features = x, x^2, x^3
404 std::vector<std::vector<float>> test_data2(
405 {{-2, 4, -8}, {2, 4, 8}, {-10, 100, -1000}, {10, 100, 1000}});
406 // expected regression outputs
407 std::vector<float> expected2({-104, -88, -1000, 1000});
408 // predicted regression outputs
409 std::vector<float> out2 = predict_OLS_regressor(test_data2, beta2);
410 // compare predicted results are within +-0.01 limit of expected
411 for (size_t rows = 0; rows < out2.size(); rows++) {
412 assert(std::abs(out2[rows] - expected2[rows]) < 0.01);
413 }
414 std::cout << "passed\n";
415
416 std::cout << std::endl; // ensure test results are displayed on screen
417 // (flush stdout)
418}
419
423int main() {
424 ols_test();
425
426 size_t N = 0, F = 0;
427
428 std::cout << "Enter number of features: ";
429 // number of features = columns
430 std::cin >> F;
431 std::cout << "Enter number of samples: ";
432 // number of samples = rows
433 std::cin >> N;
434
435 std::vector<std::vector<float>> data(N);
436 std::vector<float> Y(N);
437
438 std::cout
439 << "Enter training data. Per sample, provide features and one output."
440 << std::endl;
441
442 for (size_t rows = 0; rows < N; rows++) {
443 std::vector<float> v(F);
444 std::cout << "Sample# " << rows + 1 << ": ";
445 for (size_t cols = 0; cols < F; cols++) {
446 // get the F features
447 std::cin >> v[cols];
448 }
449 data[rows] = v;
450 // get the corresponding output
451 std::cin >> Y[rows];
452 }
453
454 std::vector<float> beta = fit_OLS_regressor(data, Y);
455 std::cout << std::endl << std::endl << "beta:" << beta << std::endl;
456
457 size_t T = 0;
458 std::cout << "Enter number of test samples: ";
459 // number of test sample inputs
460 std::cin >> T;
461 std::vector<std::vector<float>> data2(T);
462 // vector<float> Y2(T);
463
464 for (size_t rows = 0; rows < T; rows++) {
465 std::cout << "Sample# " << rows + 1 << ": ";
466 std::vector<float> v(F);
467 for (size_t cols = 0; cols < F; cols++) std::cin >> v[cols];
468 data2[rows] = v;
469 }
470
471 std::vector<float> out = predict_OLS_regressor(data2, beta);
472 for (size_t rows = 0; rows < T; rows++) std::cout << out[rows] << std::endl;
473
474 return 0;
475}
int data[MAX]
test data
std::vector< T > operator+(std::vector< T > const &A, std::vector< T > const &B)
std::vector< std::vector< T > > get_transpose(std::vector< std::vector< T > > const &A)
std::vector< T > operator-(std::vector< T > const &A, std::vector< T > const &B)
std::ostream & operator<<(std::ostream &out, std::vector< std::vector< T > > const &v)
std::vector< float > operator/(std::vector< T > const &A, float const scalar)
bool is_square(std::vector< std::vector< T > > const &A)
std::vector< float > fit_OLS_regressor(std::vector< std::vector< T > > const &X, std::vector< T > const &Y)
std::vector< float > predict_OLS_regressor(std::vector< std::vector< T > > const &X, std::vector< float > const &beta)
std::vector< std::vector< T > > operator*(std::vector< std::vector< T > > const &A, std::vector< std::vector< T > > const &B)
std::vector< std::vector< float > > get_inverse(std::vector< std::vector< T > > const &A)