embedded-dev-research · NeiroYT · May 26, 2024 · May 26, 2024 · May 26, 2024 · Jun 7, 2024
@@ -1,23 +1,26 @@
 #pragma once
 #include <algorithm>
 #include <stdexcept>
+#include <utility>
 #include <vector>
 
 #include "layers/Layer.hpp"
 
 namespace itlab_2023 {
 
+const size_t kDepth1 = 128;
+const size_t kDepth2 = 5;
+
 class FCLayer : public Layer {
  private:
   Tensor weights_;
   Tensor bias_;
+  ImplType implType_;
 
  public:
   FCLayer() = default;
-  FCLayer(const Tensor& weights, const Tensor& bias) {
-    weights_ = weights;
-    bias_ = bias;
-  }
+  FCLayer(Tensor weights, const Tensor& bias, ImplType implType = kDefault)
+      : weights_(std::move(weights)), bias_(bias), implType_(implType) {}
   static std::string get_name() { return "Fully-connected layer"; }
   void run(const Tensor& input, Tensor& output) override;
 #ifdef ENABLE_STATISTIC_WEIGHTS
@@ -32,7 +35,7 @@ std::vector<ValueType> mat_vec_mul(const std::vector<ValueType>& mat,
   if (mat_shape.dims() != 2) {
     throw std::invalid_argument("Not a matrix in argument");
   }
-  if (vec.size() != mat_shape[1]) {
+  if (vec.size() < mat_shape[1]) {
     throw std::invalid_argument("Invalid vector size");
   }
   Shape res_shape(1);
@@ -50,6 +53,119 @@ std::vector<ValueType> mat_vec_mul(const std::vector<ValueType>& mat,
   return res;
 }
 
+template <typename ValueType>
+inline ValueType get_from(size_t i, size_t j, const std::vector<ValueType>& mat,
+                          const Shape& mat_shape) {
+  if (i < mat_shape[0] && j < mat_shape[1]) {
+    return mat[i * mat_shape[1] + j];
+  }
+  return ValueType(0);
+}
+
+template <typename ValueType>
+void m_mult(const std::vector<ValueType>& mat,
+            const std::vector<ValueType>& vec, const Shape& mat_shape,
+            std::vector<ValueType>& res, size_t ind_x, size_t ind_y,
+            size_t size, size_t depth) {
+  if (depth > kDepth2 || size < kDepth1) {
+    for (size_t i = 0; i < size; i++) {
+      for (size_t j = 0; j < size; j++) {
+        if (ind_x + j < vec.size()) {
+          res[ind_y + i] +=
+              get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j];
+        }
+      }
+    }
+  } else {
+    std::vector<size_t> tmp_x({0, size / 2, 0, size / 2});
+    std::vector<size_t> tmp_y({0, 0, size / 2, size / 2});
+    for (size_t i = 0; i < 4; i++) {
+      m_mult<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[i],
+                        ind_y + tmp_y[i], size / 2, depth + 1);
+    }
+  }
+}
+
+template <typename ValueType>
+void m_mult_tbb(const std::vector<ValueType>& mat,
+                const std::vector<ValueType>& vec, const Shape& mat_shape,
+                std::vector<ValueType>& res, size_t ind_x, size_t ind_y,
+                size_t size, size_t depth) {
+  if (depth > kDepth2 || size < kDepth1) {
+    for (size_t i = 0; i < size; i++) {
+      for (size_t j = 0; j < size; j++) {
+        if (ind_x + j < vec.size()) {
+          res[ind_y + i] +=
+              get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j];
+        }
+      }
+    }
+  } else {
+    size_t size_2 = size / 2;
+    std::vector<size_t> tmp_x({0, size_2, 0, size_2});
+    std::vector<size_t> tmp_y({0, 0, size_2, size_2});
+    oneapi::tbb::task_group g;
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[0],
+                            ind_y + tmp_y[0], size_2, depth + 1);
+    });
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[2],
+                            ind_y + tmp_y[2], size_2, depth + 1);
+    });
+    g.wait();
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[1],
+                            ind_y + tmp_y[1], size_2, depth + 1);
+    });
+    g.run([&]() {
+      m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[3],
+                            ind_y + tmp_y[3], size_2, depth + 1);
+    });
+    g.wait();
+  }
+}
+
+template <typename ValueType>
+std::vector<ValueType> mat_vec_mul_upd(const std::vector<ValueType>& mat,
+                                       const Shape& mat_shape,
+                                       const std::vector<ValueType>& vec) {
+  if (mat_shape.dims() != 2) {
+    throw std::invalid_argument("Not a matrix in argument");
+  }
+  if (vec.size() < mat_shape[1]) {
+    throw std::invalid_argument("Invalid vector size");
+  }
+  size_t near_pow2 = 1;
+  while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
+    near_pow2 = near_pow2 << 1;
+  }
+  std::vector<ValueType> res(near_pow2);
+  m_mult<ValueType>(mat, vec, mat_shape, res, 0, 0, near_pow2, 1);
+  res.resize(mat_shape[0]);
+  return res;
+}
+
+template <typename ValueType>
+std::vector<ValueType> mat_vec_mul_upd_tbb(const std::vector<ValueType>& mat,
+                                           const Shape& mat_shape,
+                                           const std::vector<ValueType>& vec) {
+  if (mat_shape.dims() != 2) {
+    throw std::invalid_argument("Not a matrix in argument");
+  }
+  if (vec.size() < mat_shape[1]) {
+    throw std::invalid_argument("Invalid vector size");
+  }
+  size_t near_pow2 = 1;
+  while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
+    near_pow2 = near_pow2 << 1;
+  }
+  std::vector<ValueType> res(near_pow2);
+  m_mult_tbb<ValueType>(mat, vec, mat_shape, res, 0, 0, near_pow2, 1);
+  res.resize(mat_shape[0]);
+  return res;
+}
+
 template <typename ValueType>
 class FCLayerImpl : public LayerImpl<ValueType> {
  public:
@@ -86,7 +202,7 @@ class FCLayerImpl : public LayerImpl<ValueType> {
   std::vector<ValueType> run(
       const std::vector<ValueType>& input) const override;
 
- private:
+ protected:
   std::vector<ValueType> weights_;
   std::vector<ValueType> bias_;
 };
@@ -129,4 +245,32 @@ std::vector<ValueType> FCLayerImpl<ValueType>::run(
                  output_values.begin(), std::plus<ValueType>());
   return output_values;
 }
+
+template <typename ValueType>
+class FCLayerImplTBB : public FCLayerImpl<ValueType> {
+ public:
+  FCLayerImplTBB(const std::vector<ValueType>& input_weights,
+                 const Shape& input_weights_shape,
+                 const std::vector<ValueType>& input_bias)
+      : FCLayerImpl<ValueType>(input_weights, input_weights_shape, input_bias) {
+  }
+  std::vector<ValueType> run(
+      const std::vector<ValueType>& input) const override;
+};
+
+template <typename ValueType>
+std::vector<ValueType> FCLayerImplTBB<ValueType>::run(
+    const std::vector<ValueType>& input) const {
+  if (input.size() != this->inputShape_[0]) {
+    throw std::invalid_argument("Input size doesn't fit FCLayer");
+  }
+  Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]});
+  std::vector<ValueType> output_values =
+      mat_vec_mul_upd_tbb(this->weights_, cur_w_shape, input);
+  std::transform(output_values.begin(), output_values.end(),
+                 this->bias_.begin(), output_values.begin(),
+                 std::plus<ValueType>());
+  return output_values;
+}
+
 }  // namespace itlab_2023
@@ -7,6 +7,7 @@
 
 #include "layers/Shape.hpp"
 #include "layers/Tensor.hpp"
+#include "oneapi/tbb.h"
 
 namespace itlab_2023 {
 
@@ -21,6 +22,8 @@ enum LayerType {
   kOutput,
 };
 
+enum ImplType { kDefault, kTBB };
+
 class Layer {
  public:
   Layer() = default;

@@ -13,8 +13,11 @@ enum PoolingType { kAverage, kMax };
 class PoolingLayer : public Layer {
  public:
   PoolingLayer() = default;
-  PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average")
-      : poolingShape_(pooling_shape), poolingType_(std::move(pooling_type)) {}
+  PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average",
+               ImplType implType = kDefault)
+      : poolingShape_(pooling_shape),
+        poolingType_(std::move(pooling_type)),
+        implType_(implType) {}
   static std::string get_name() { return "Pooling layer"; }
   void run(const Tensor& input, Tensor& output) override;
 #ifdef ENABLE_STATISTIC_WEIGHTS
@@ -28,13 +31,14 @@ class PoolingLayer : public Layer {
  private:
   Shape poolingShape_;
   std::string poolingType_;
+  ImplType implType_;
 };
 
-inline bool isOutOfBounds(size_t index, int coord, const Shape& shape) {
+inline size_t coord_size(int coord, const Shape& shape) {
   if (coord >= 0 && static_cast<size_t>(coord) < shape.dims()) {
-    return (index >= shape[coord]);
+    return shape[coord];
   }
-  return (index > 0);
+  return 1;
 }
 
 template <typename ValueType>
@@ -65,7 +69,7 @@ class PoolingLayerImpl : public LayerImpl<ValueType> {
   std::vector<ValueType> run(
       const std::vector<ValueType>& input) const override;
 
- private:
+ protected:
   Shape poolingShape_;
   PoolingType poolingType_;
 };
@@ -120,24 +124,23 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
   int input_h_index = this->inputShape_.dims() > 2
                           ? (static_cast<int>(this->inputShape_.dims()) - 2)
                           : 0;
-  // O(N^2)
-  for (size_t n = 0; !isOutOfBounds(n, input_h_index - 2, this->outputShape_);
+  for (size_t n = 0; n < coord_size(input_h_index - 2, this->outputShape_);
        n++) {
-    for (size_t c = 0; !isOutOfBounds(c, input_h_index - 1, this->outputShape_);
+    for (size_t c = 0; c < coord_size(input_h_index - 1, this->outputShape_);
          c++) {
-      for (size_t i = 0; !isOutOfBounds(i, input_h_index, this->outputShape_);
+      for (size_t i = 0; i < coord_size(input_h_index, this->outputShape_);
            i++) {
         for (size_t j = 0;
-             !isOutOfBounds(j, input_h_index + 1, this->outputShape_); j++) {
+             j < coord_size(input_h_index + 1, this->outputShape_); j++) {
           tmpheight = poolingShape_[0] * i;
           if (poolingShape_.dims() == 1) {
             tmpwidth = j;
           } else {
             tmpwidth = poolingShape_[1] * j;
           }
           // to get matrix block for pooling
-          for (size_t k = 0; !isOutOfBounds(k, 0, poolingShape_); k++) {
-            for (size_t l = 0; !isOutOfBounds(l, 1, poolingShape_); l++) {
+          for (size_t k = 0; k < coord_size(0, poolingShape_); k++) {
+            for (size_t l = 0; l < coord_size(1, poolingShape_); l++) {
               if (this->inputShape_.dims() == 1) {
                 pooling_buf.push_back(input[tmpheight + k]);
               } else {
@@ -166,4 +169,101 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
   }
   return res;
 }
+
+template <typename ValueType>
+class PoolingLayerImplTBB : public PoolingLayerImpl<ValueType> {
+ public:
+  PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape,
+                      const std::string& pooling_type = "average")
+      : PoolingLayerImpl<ValueType>(input_shape, pooling_shape, pooling_type) {}
+  std::vector<ValueType> run(
+      const std::vector<ValueType>& input) const override;
+};
+
+template <typename ValueType>
+std::vector<ValueType> PoolingLayerImplTBB<ValueType>::run(
+    const std::vector<ValueType>& input) const {
+  if (input.size() != this->inputShape_.count()) {
+    throw std::invalid_argument("Input size doesn't fit pooling layer");
+  }
+  std::vector<ValueType> res(this->outputShape_.count());
+  int input_h_index = this->inputShape_.dims() > 2
+                          ? (static_cast<int>(this->inputShape_.dims()) - 2)
+                          : 0;
+  oneapi::tbb::parallel_for(
+      oneapi::tbb::blocked_range2d<size_t>(
+          0, coord_size(input_h_index - 2, this->outputShape_), 0,
+          coord_size(input_h_index - 1, this->outputShape_)),
+      [&](oneapi::tbb::blocked_range2d<size_t> r) {
+        for (size_t n = r.rows().begin(); n < r.rows().end(); n++) {
+          for (size_t c = r.cols().begin(); c < r.cols().end(); c++) {
+            oneapi::tbb::parallel_for(
+                oneapi::tbb::blocked_range2d<size_t>(
+                    0, coord_size(input_h_index, this->outputShape_), 0,
+                    coord_size(input_h_index + 1, this->outputShape_)),
+                [&](oneapi::tbb::blocked_range2d<size_t> r1) {
+                  for (size_t i = r1.rows().begin(); i < r1.rows().end(); i++) {
+                    for (size_t j = r1.cols().begin(); j < r1.cols().end();
+                         j++) {
+                      std::vector<ValueType> pooling_buf;
+                      std::vector<size_t> coords;
+                      size_t tmpwidth;
+                      size_t tmpheight;
+                      tmpheight = this->poolingShape_[0] * i;
+                      if (this->poolingShape_.dims() == 1) {
+                        tmpwidth = j;
+                      } else {
+                        tmpwidth = this->poolingShape_[1] * j;
+                      }
+                      for (size_t k = 0; k < coord_size(0, this->poolingShape_);
+                           k++) {
+                        for (size_t l = 0;
+                             l < coord_size(1, this->poolingShape_); l++) {
+                          if (this->inputShape_.dims() == 1) {
+                            pooling_buf.push_back(input[tmpheight + k]);
+                          } else {
+                            coords = std::vector<size_t>(
+                                {n, c, tmpheight + k, tmpwidth + l});
+                            pooling_buf.push_back(
+                                input[this->inputShape_.get_index(
+                                    std::vector<size_t>(
+                                        coords.end() - this->inputShape_.dims(),
+                                        coords.end()))]);
+                          }
+                        }
+                      }
+                      coords = std::vector<size_t>({n, c, i, j});
+                      switch (this->poolingType_) {
+                        case kAverage:
+                          if (this->inputShape_.dims() == 1) {
+                            res[i] = avg_pooling(pooling_buf);
+                          } else {
+                            res[this->outputShape_.get_index(
+                                std::vector<size_t>(
+                                    coords.end() - this->inputShape_.dims(),
+                                    coords.end()))] = avg_pooling(pooling_buf);
+                          }
+                          break;
+                        case kMax:
+                          if (this->inputShape_.dims() == 1) {
+                            res[i] = max_pooling(pooling_buf);
+                          } else {
+                            res[this->outputShape_.get_index(
+                                std::vector<size_t>(
+                                    coords.end() - this->inputShape_.dims(),
+                                    coords.end()))] = max_pooling(pooling_buf);
+                            break;
+                            default:
+                              throw std::runtime_error("Unknown pooling type");
+                          }
+                      }
+                    }
+                  }
+                });
+          }
+        }
+      });
+  return res;
+}
+
 }  // namespace itlab_2023
@@ -1,2 +1,3 @@
 file(GLOB_RECURSE graph_src *.cpp)
 add_library(graph_lib STATIC "${GRAPH_HEADERS}" "${graph_src}")
+target_link_libraries(graph_lib PUBLIC TBB::tbb)