Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize FC layer using TBB, update layer API to choose perf/reference implementation #143

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
156 changes: 150 additions & 6 deletions include/layers/FCLayer.hpp
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
#pragma once
#include <algorithm>
#include <stdexcept>
#include <utility>
#include <vector>

#include "layers/Layer.hpp"

namespace itlab_2023 {

const size_t kDepth1 = 128;
const size_t kDepth2 = 5;

class FCLayer : public Layer {
private:
Tensor weights_;
Tensor bias_;
ImplType implType_;

public:
FCLayer() = default;
FCLayer(const Tensor& weights, const Tensor& bias) {
weights_ = weights;
bias_ = bias;
}
FCLayer(Tensor weights, const Tensor& bias, ImplType implType = kDefault)
: weights_(std::move(weights)), bias_(bias), implType_(implType) {}
static std::string get_name() { return "Fully-connected layer"; }
void run(const Tensor& input, Tensor& output) override;
#ifdef ENABLE_STATISTIC_WEIGHTS
Expand All @@ -32,7 +35,7 @@ std::vector<ValueType> mat_vec_mul(const std::vector<ValueType>& mat,
if (mat_shape.dims() != 2) {
throw std::invalid_argument("Not a matrix in argument");
}
if (vec.size() != mat_shape[1]) {
if (vec.size() < mat_shape[1]) {
throw std::invalid_argument("Invalid vector size");
}
Shape res_shape(1);
Expand All @@ -50,6 +53,119 @@ std::vector<ValueType> mat_vec_mul(const std::vector<ValueType>& mat,
return res;
}

template <typename ValueType>
inline ValueType get_from(size_t i, size_t j, const std::vector<ValueType>& mat,
const Shape& mat_shape) {
if (i < mat_shape[0] && j < mat_shape[1]) {
return mat[i * mat_shape[1] + j];
}
return ValueType(0);
}

template <typename ValueType>
void m_mult(const std::vector<ValueType>& mat,
const std::vector<ValueType>& vec, const Shape& mat_shape,
std::vector<ValueType>& res, size_t ind_x, size_t ind_y,
size_t size, size_t depth) {
if (depth > kDepth2 || size < kDepth1) {
for (size_t i = 0; i < size; i++) {
for (size_t j = 0; j < size; j++) {
if (ind_x + j < vec.size()) {
res[ind_y + i] +=
get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j];
}
}
}
} else {
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
std::vector<size_t> tmp_x({0, size / 2, 0, size / 2});
std::vector<size_t> tmp_y({0, 0, size / 2, size / 2});
for (size_t i = 0; i < 4; i++) {
m_mult<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[i],
ind_y + tmp_y[i], size / 2, depth + 1);
}
}
}

template <typename ValueType>
void m_mult_tbb(const std::vector<ValueType>& mat,
const std::vector<ValueType>& vec, const Shape& mat_shape,
std::vector<ValueType>& res, size_t ind_x, size_t ind_y,
size_t size, size_t depth) {
if (depth > kDepth2 || size < kDepth1) {
for (size_t i = 0; i < size; i++) {
for (size_t j = 0; j < size; j++) {
if (ind_x + j < vec.size()) {
res[ind_y + i] +=
get_from(ind_y + i, ind_x + j, mat, mat_shape) * vec[ind_x + j];
}
}
}
} else {
size_t size_2 = size / 2;
std::vector<size_t> tmp_x({0, size_2, 0, size_2});
std::vector<size_t> tmp_y({0, 0, size_2, size_2});
oneapi::tbb::task_group g;
g.run([&]() {
m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[0],
ind_y + tmp_y[0], size_2, depth + 1);
});
g.run([&]() {
m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[2],
ind_y + tmp_y[2], size_2, depth + 1);
});
g.wait();
g.run([&]() {
m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[1],
ind_y + tmp_y[1], size_2, depth + 1);
});
g.run([&]() {
m_mult_tbb<ValueType>(mat, vec, mat_shape, res, ind_x + tmp_x[3],
ind_y + tmp_y[3], size_2, depth + 1);
});
g.wait();
}
}

template <typename ValueType>
std::vector<ValueType> mat_vec_mul_upd(const std::vector<ValueType>& mat,
const Shape& mat_shape,
const std::vector<ValueType>& vec) {
if (mat_shape.dims() != 2) {
throw std::invalid_argument("Not a matrix in argument");
}
if (vec.size() < mat_shape[1]) {
throw std::invalid_argument("Invalid vector size");
}
size_t near_pow2 = 1;
while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
near_pow2 = near_pow2 << 1;
}
std::vector<ValueType> res(near_pow2);
m_mult<ValueType>(mat, vec, mat_shape, res, 0, 0, near_pow2, 1);
res.resize(mat_shape[0]);
return res;
}

template <typename ValueType>
std::vector<ValueType> mat_vec_mul_upd_tbb(const std::vector<ValueType>& mat,
const Shape& mat_shape,
const std::vector<ValueType>& vec) {
if (mat_shape.dims() != 2) {
throw std::invalid_argument("Not a matrix in argument");
}
if (vec.size() < mat_shape[1]) {
throw std::invalid_argument("Invalid vector size");
}
size_t near_pow2 = 1;
while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
near_pow2 = near_pow2 << 1;
}
std::vector<ValueType> res(near_pow2);
m_mult_tbb<ValueType>(mat, vec, mat_shape, res, 0, 0, near_pow2, 1);
res.resize(mat_shape[0]);
return res;
}

template <typename ValueType>
class FCLayerImpl : public LayerImpl<ValueType> {
public:
Expand Down Expand Up @@ -86,7 +202,7 @@ class FCLayerImpl : public LayerImpl<ValueType> {
std::vector<ValueType> run(
const std::vector<ValueType>& input) const override;

private:
protected:
std::vector<ValueType> weights_;
std::vector<ValueType> bias_;
};
Expand Down Expand Up @@ -129,4 +245,32 @@ std::vector<ValueType> FCLayerImpl<ValueType>::run(
output_values.begin(), std::plus<ValueType>());
return output_values;
}

template <typename ValueType>
class FCLayerImplTBB : public FCLayerImpl<ValueType> {
public:
FCLayerImplTBB(const std::vector<ValueType>& input_weights,
const Shape& input_weights_shape,
const std::vector<ValueType>& input_bias)
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
: FCLayerImpl<ValueType>(input_weights, input_weights_shape, input_bias) {
}
std::vector<ValueType> run(
const std::vector<ValueType>& input) const override;
};

template <typename ValueType>
std::vector<ValueType> FCLayerImplTBB<ValueType>::run(
const std::vector<ValueType>& input) const {
if (input.size() != this->inputShape_[0]) {
throw std::invalid_argument("Input size doesn't fit FCLayer");
}
Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]});
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
std::vector<ValueType> output_values =
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
mat_vec_mul_upd_tbb(this->weights_, cur_w_shape, input);
std::transform(output_values.begin(), output_values.end(),
this->bias_.begin(), output_values.begin(),
std::plus<ValueType>());
return output_values;
}

} // namespace itlab_2023
3 changes: 3 additions & 0 deletions include/layers/Layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "layers/Shape.hpp"
#include "layers/Tensor.hpp"
#include "oneapi/tbb.h"

namespace itlab_2023 {

Expand All @@ -21,6 +22,8 @@ enum LayerType {
kOutput,
};

enum ImplType { kDefault, kTBB };

class Layer {
public:
Layer() = default;
Expand Down
126 changes: 113 additions & 13 deletions include/layers/PoolingLayer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,11 @@ enum PoolingType { kAverage, kMax };
class PoolingLayer : public Layer {
public:
PoolingLayer() = default;
PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average")
: poolingShape_(pooling_shape), poolingType_(std::move(pooling_type)) {}
PoolingLayer(const Shape& pooling_shape, std::string pooling_type = "average",
ImplType implType = kDefault)
: poolingShape_(pooling_shape),
poolingType_(std::move(pooling_type)),
implType_(implType) {}
static std::string get_name() { return "Pooling layer"; }
void run(const Tensor& input, Tensor& output) override;
#ifdef ENABLE_STATISTIC_WEIGHTS
Expand All @@ -28,13 +31,14 @@ class PoolingLayer : public Layer {
private:
Shape poolingShape_;
std::string poolingType_;
ImplType implType_;
};

inline bool isOutOfBounds(size_t index, int coord, const Shape& shape) {
inline size_t coord_size(int coord, const Shape& shape) {
if (coord >= 0 && static_cast<size_t>(coord) < shape.dims()) {
return (index >= shape[coord]);
return shape[coord];
}
return (index > 0);
return 1;
}

template <typename ValueType>
Expand Down Expand Up @@ -65,7 +69,7 @@ class PoolingLayerImpl : public LayerImpl<ValueType> {
std::vector<ValueType> run(
const std::vector<ValueType>& input) const override;

private:
protected:
Shape poolingShape_;
PoolingType poolingType_;
};
Expand Down Expand Up @@ -120,24 +124,23 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
int input_h_index = this->inputShape_.dims() > 2
? (static_cast<int>(this->inputShape_.dims()) - 2)
: 0;
// O(N^2)
for (size_t n = 0; !isOutOfBounds(n, input_h_index - 2, this->outputShape_);
for (size_t n = 0; n < coord_size(input_h_index - 2, this->outputShape_);
n++) {
for (size_t c = 0; !isOutOfBounds(c, input_h_index - 1, this->outputShape_);
for (size_t c = 0; c < coord_size(input_h_index - 1, this->outputShape_);
c++) {
for (size_t i = 0; !isOutOfBounds(i, input_h_index, this->outputShape_);
for (size_t i = 0; i < coord_size(input_h_index, this->outputShape_);
i++) {
for (size_t j = 0;
!isOutOfBounds(j, input_h_index + 1, this->outputShape_); j++) {
j < coord_size(input_h_index + 1, this->outputShape_); j++) {
tmpheight = poolingShape_[0] * i;
if (poolingShape_.dims() == 1) {
tmpwidth = j;
} else {
tmpwidth = poolingShape_[1] * j;
}
// to get matrix block for pooling
for (size_t k = 0; !isOutOfBounds(k, 0, poolingShape_); k++) {
for (size_t l = 0; !isOutOfBounds(l, 1, poolingShape_); l++) {
for (size_t k = 0; k < coord_size(0, poolingShape_); k++) {
for (size_t l = 0; l < coord_size(1, poolingShape_); l++) {
if (this->inputShape_.dims() == 1) {
pooling_buf.push_back(input[tmpheight + k]);
} else {
Expand Down Expand Up @@ -166,4 +169,101 @@ std::vector<ValueType> PoolingLayerImpl<ValueType>::run(
}
return res;
}

template <typename ValueType>
class PoolingLayerImplTBB : public PoolingLayerImpl<ValueType> {
public:
PoolingLayerImplTBB(const Shape& input_shape, const Shape& pooling_shape,
const std::string& pooling_type = "average")
: PoolingLayerImpl<ValueType>(input_shape, pooling_shape, pooling_type) {}
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
std::vector<ValueType> run(
const std::vector<ValueType>& input) const override;
};

template <typename ValueType>
std::vector<ValueType> PoolingLayerImplTBB<ValueType>::run(
const std::vector<ValueType>& input) const {
if (input.size() != this->inputShape_.count()) {
throw std::invalid_argument("Input size doesn't fit pooling layer");
}
std::vector<ValueType> res(this->outputShape_.count());
int input_h_index = this->inputShape_.dims() > 2
? (static_cast<int>(this->inputShape_.dims()) - 2)
: 0;
oneapi::tbb::parallel_for(
oneapi::tbb::blocked_range2d<size_t>(
0, coord_size(input_h_index - 2, this->outputShape_), 0,
coord_size(input_h_index - 1, this->outputShape_)),
[&](oneapi::tbb::blocked_range2d<size_t> r) {
for (size_t n = r.rows().begin(); n < r.rows().end(); n++) {
for (size_t c = r.cols().begin(); c < r.cols().end(); c++) {
oneapi::tbb::parallel_for(
oneapi::tbb::blocked_range2d<size_t>(
0, coord_size(input_h_index, this->outputShape_), 0,
coord_size(input_h_index + 1, this->outputShape_)),
[&](oneapi::tbb::blocked_range2d<size_t> r1) {
for (size_t i = r1.rows().begin(); i < r1.rows().end(); i++) {
for (size_t j = r1.cols().begin(); j < r1.cols().end();
j++) {
std::vector<ValueType> pooling_buf;
std::vector<size_t> coords;
size_t tmpwidth;
size_t tmpheight;
tmpheight = this->poolingShape_[0] * i;
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
if (this->poolingShape_.dims() == 1) {
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
tmpwidth = j;
} else {
tmpwidth = this->poolingShape_[1] * j;
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
}
for (size_t k = 0; k < coord_size(0, this->poolingShape_);
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
k++) {
for (size_t l = 0;
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
l < coord_size(1, this->poolingShape_); l++) {
if (this->inputShape_.dims() == 1) {
pooling_buf.push_back(input[tmpheight + k]);
} else {
coords = std::vector<size_t>(
{n, c, tmpheight + k, tmpwidth + l});
pooling_buf.push_back(
input[this->inputShape_.get_index(
std::vector<size_t>(
coords.end() - this->inputShape_.dims(),
coords.end()))]);
}
}
}
coords = std::vector<size_t>({n, c, i, j});
switch (this->poolingType_) {
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
case kAverage:
if (this->inputShape_.dims() == 1) {
res[i] = avg_pooling(pooling_buf);
} else {
res[this->outputShape_.get_index(
std::vector<size_t>(
coords.end() - this->inputShape_.dims(),
coords.end()))] = avg_pooling(pooling_buf);
}
break;
case kMax:
if (this->inputShape_.dims() == 1) {
res[i] = max_pooling(pooling_buf);
} else {
res[this->outputShape_.get_index(
std::vector<size_t>(
coords.end() - this->inputShape_.dims(),
coords.end()))] = max_pooling(pooling_buf);
break;
default:
throw std::runtime_error("Unknown pooling type");
}
}
}
}
});
}
}
});
return res;
}

} // namespace itlab_2023
1 change: 1 addition & 0 deletions src/graph/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
file(GLOB_RECURSE graph_src *.cpp)
add_library(graph_lib STATIC "${GRAPH_HEADERS}" "${graph_src}")
target_link_libraries(graph_lib PUBLIC TBB::tbb)
Loading
Loading