Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize FC layer using TBB, update layer API to choose perf/reference implementation #143

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
273 changes: 268 additions & 5 deletions include/layers/FCLayer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@

namespace itlab_2023 {

const size_t DEPTH = 64;
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved

class FCLayer : public Layer {
private:
Tensor weights_;
Tensor bias_;
ImplType implType_;

public:
FCLayer() = default;
FCLayer(const Tensor& weights, const Tensor& bias) {
weights_ = weights;
bias_ = bias;
}
FCLayer(const Tensor& weights, const Tensor& bias,
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
ImplType implType = kDefault)
: weights_(weights), bias_(bias), implType_(implType) {}
static std::string get_name() { return "Fully-connected layer"; }
void run(const Tensor& input, Tensor& output) override;
};
Expand Down Expand Up @@ -47,6 +49,240 @@
return res;
}

template <typename ValueType>
inline ValueType get_from(size_t i, size_t j, const std::vector<ValueType>& mat,
const Shape& mat_shape) {
if (i < mat_shape[0] && j < mat_shape[1]) {
return mat[i * mat_shape[1] + j];
}
return ValueType(0);
}

template <typename ValueType>
inline std::vector<ValueType> m_plus(const std::vector<ValueType>& mat1,
const std::vector<ValueType>& mat2) {
std::vector<ValueType> res(mat1.size());
std::transform(mat1.begin(), mat1.end(), mat2.begin(), res.begin(),
std::plus<ValueType>());
return res;
}

template <typename ValueType>
inline std::vector<ValueType> m_minus(const std::vector<ValueType>& mat1,
const std::vector<ValueType>& mat2) {
std::vector<ValueType> res(mat1.size());
std::transform(mat1.begin(), mat1.end(), mat2.begin(), res.begin(),
std::minus<ValueType>());
return res;
}

template <typename ValueType>
void split_into_blocks(const std::vector<ValueType>& mat,
const Shape& mat_shape,
const std::vector<ValueType>& vec,
std::vector<std::vector<ValueType> >& tmp,
size_t near_pow2) {
for (size_t i = 0; i < near_pow2 / 2; i++) {
for (size_t j = 0; j < near_pow2 / 2; j++) {
tmp[0].push_back(get_from<ValueType>(i, j, mat, mat_shape));
}
for (size_t j = near_pow2 / 2; j < near_pow2; j++) {
tmp[1].push_back(get_from<ValueType>(i, j, mat, mat_shape));
}
}
for (size_t i = near_pow2 / 2; i < near_pow2; i++) {
for (size_t j = 0; j < near_pow2 / 2; j++) {
tmp[2].push_back(get_from<ValueType>(i, j, mat, mat_shape));
}
for (size_t j = near_pow2 / 2; j < near_pow2; j++) {
tmp[3].push_back(get_from<ValueType>(i, j, mat, mat_shape));
}
}
for (size_t i = 0; i < near_pow2 / 2; i++) {
tmp[4].push_back(get_from<ValueType>(0, i, vec, mat_shape));
}
for (size_t i = near_pow2 / 2; i < near_pow2; i++) {
tmp[5].push_back(get_from<ValueType>(0, i, vec, mat_shape));
}
}

template <typename ValueType>
std::vector<ValueType> mat_vec_mul_upd(const std::vector<ValueType>& mat,
const Shape& mat_shape,
const std::vector<ValueType>& vec) {
if (mat_shape.dims() != 2) {
throw std::invalid_argument("Not a matrix in argument");
}
if (vec.size() != mat_shape[1]) {
throw std::invalid_argument("Invalid vector size");
}
Shape res_shape(1);
res_shape[0] = mat_shape[0];
std::vector<ValueType> res;
if (mat_shape[0] <= DEPTH && mat_shape[1] <= DEPTH) {
return mat_vec_mul(mat, mat_shape, vec);
} else {
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
size_t near_pow2 = 1;
std::vector<std::vector<ValueType> > tmp(6);
while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
near_pow2 = near_pow2 << 1;
}
split_into_blocks(mat, mat_shape, vec, tmp, near_pow2);
Shape cur_shape({near_pow2 / 2, near_pow2 / 2});
std::vector<ValueType> d =
mat_vec_mul_upd<ValueType>(m_plus(tmp[0], tmp[3]), cur_shape, tmp[4]);
std::vector<ValueType> d1 =
mat_vec_mul_upd<ValueType>(m_minus(tmp[1], tmp[3]), cur_shape, tmp[5]);
std::vector<ValueType> d2 =
mat_vec_mul_upd<ValueType>(m_minus(tmp[2], tmp[0]), cur_shape, tmp[4]);
std::vector<ValueType> h2 =
mat_vec_mul_upd<ValueType>(m_plus(tmp[2], tmp[3]), cur_shape, tmp[4]);
std::vector<ValueType> v1 =
mat_vec_mul_upd<ValueType>(tmp[3], cur_shape, m_minus(tmp[5], tmp[4]));
std::vector<ValueType> r1 = m_plus(m_plus(d1, v1), d);
std::vector<ValueType> r2 = m_plus(v1, h2);
res = r1;
for (size_t i = 0; i < res_shape[0] - r1.size(); i++) {
res.push_back(r2[i]);
}
}
return res;
}

template <typename ValueType>
void split_into_blocks_tbb(const std::vector<ValueType>& mat,
const Shape& mat_shape,
const std::vector<ValueType>& vec,
std::vector<std::vector<ValueType> >& tmp,
size_t near_pow2) {
oneapi::tbb::parallel_for(
oneapi::tbb::blocked_range<size_t>(0, near_pow2 / 2),
[&](oneapi::tbb::blocked_range<size_t> r) {
for (size_t i = r.begin(); i < r.end(); i++) {
for (size_t j = 0; j < near_pow2 / 2; j++) {
tmp[0][i * (near_pow2 / 2) + j] =
get_from<ValueType>(i, j, mat, mat_shape);
}
for (size_t j = near_pow2 / 2; j < near_pow2; j++) {
tmp[1][i * (near_pow2 / 2) + j - near_pow2 / 2] =
get_from<ValueType>(i, j, mat, mat_shape);
}
}
});
oneapi::tbb::parallel_for(
oneapi::tbb::blocked_range<size_t>(near_pow2 / 2, near_pow2),
[&](oneapi::tbb::blocked_range<size_t> r) {
for (size_t i = r.begin(); i < r.end(); i++) {
for (size_t j = 0; j < near_pow2 / 2; j++) {
tmp[2][(i - near_pow2 / 2) * (near_pow2 / 2) + j] =
get_from<ValueType>(i, j, mat, mat_shape);
}
for (size_t j = near_pow2 / 2; j < near_pow2; j++) {
tmp[3][(i - near_pow2 / 2) * (near_pow2 / 2) + j - near_pow2 / 2] =
get_from<ValueType>(i, j, mat, mat_shape);
}
}
});
for (size_t i = 0; i < near_pow2 / 2; i++) {
tmp[4].push_back(get_from<ValueType>(0, i, vec, mat_shape));
}
for (size_t i = near_pow2 / 2; i < near_pow2; i++) {
tmp[5].push_back(get_from<ValueType>(0, i, vec, mat_shape));
}
}

template <typename ValueType>
std::vector<ValueType> mat_vec_mul_upd_tbb(const std::vector<ValueType>& mat,
const Shape& mat_shape,
const std::vector<ValueType>& vec) {
if (mat_shape.dims() != 2) {
throw std::invalid_argument("Not a matrix in argument");
}
if (vec.size() != mat_shape[1]) {
throw std::invalid_argument("Invalid vector size");
}
Shape res_shape(1);
res_shape[0] = mat_shape[0];
std::vector<ValueType> res;
if (mat_shape[0] <= DEPTH && mat_shape[1] <= DEPTH) {
return mat_vec_mul(mat, mat_shape, vec);
} else {
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
size_t near_pow2 = 1;
while (near_pow2 < mat_shape[0] || near_pow2 < mat_shape[1]) {
near_pow2 = near_pow2 << 1;
}
std::vector<std::vector<ValueType> > tmp(
4, std::vector<ValueType>((near_pow2 / 2) * (near_pow2 / 2)));
tmp.push_back(std::vector<ValueType>());
tmp.push_back(std::vector<ValueType>());
split_into_blocks_tbb(mat, mat_shape, vec, tmp, near_pow2);
Shape cur_shape({near_pow2 / 2, near_pow2 / 2});
oneapi::tbb::task_group g;
std::vector<ValueType> d;
std::vector<ValueType> d1;
std::vector<ValueType> d2;
std::vector<ValueType> h2;
std::vector<ValueType> v1;
g.run([&]() {
d = mat_vec_mul_upd_tbb<ValueType>(m_plus(tmp[0], tmp[3]), cur_shape,
tmp[4]);
});
g.run([&]() {
d1 = mat_vec_mul_upd_tbb<ValueType>(m_minus(tmp[1], tmp[3]), cur_shape,
tmp[5]);
});
g.run([&]() {
d2 = mat_vec_mul_upd_tbb<ValueType>(m_minus(tmp[2], tmp[0]), cur_shape,
tmp[4]);
});
g.run([&]() {
h2 = mat_vec_mul_upd_tbb<ValueType>(m_plus(tmp[2], tmp[3]), cur_shape,
tmp[4]);
});
g.run([&]() {
v1 = mat_vec_mul_upd_tbb<ValueType>(tmp[3], cur_shape,
m_minus(tmp[5], tmp[4]));
});
g.wait();
std::vector<ValueType> r1 = m_plus(m_plus(d1, v1), d);
std::vector<ValueType> r2 = m_plus(v1, h2);
res = r1;
for (size_t i = 0; i < res_shape[0] - r1.size(); i++) {
res.push_back(r2[i]);
}
}
return res;
}

template <typename ValueType>
std::vector<ValueType> mat_vec_mul_tbb(const std::vector<ValueType>& mat,
const Shape& mat_shape,
const std::vector<ValueType>& vec) {
if (mat_shape.dims() != 2) {
throw std::invalid_argument("Not a matrix in argument");
}
if (vec.size() != mat_shape[1]) {
throw std::invalid_argument("Invalid vector size");
}
Shape res_shape(1);
res_shape[0] = mat_shape[0];
std::vector<ValueType> res(res_shape[0]);
ValueType elem;
oneapi::tbb::parallel_for(
oneapi::tbb::blocked_range2d<size_t>(0, mat_shape[0], 0, mat_shape[1]),
[&](oneapi::tbb::blocked_range2d<size_t> r) {

Check warning on line 273 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L273

Added line #L273 was not covered by tests
for (size_t i = r.rows().begin(); i < r.rows().end(); i++) {
elem = ValueType(0);

Check warning on line 275 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L275

Added line #L275 was not covered by tests
for (size_t j = r.cols().begin(); j < r.cols().end(); j++) {
// due to 1d indexing
elem += mat[i * mat_shape[1] + j] * vec[j];

Check warning on line 278 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L278

Added line #L278 was not covered by tests
}
res[i] = elem;

Check warning on line 280 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L280

Added line #L280 was not covered by tests
}
});
return res;

Check warning on line 283 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L283

Added line #L283 was not covered by tests
}

template <typename ValueType>
class FCLayerImpl : public LayerImpl<ValueType> {
public:
Expand Down Expand Up @@ -82,7 +318,7 @@
}
std::vector<ValueType> run(const std::vector<ValueType>& input) const;

private:
protected:
std::vector<ValueType> weights_;
std::vector<ValueType> bias_;
};
Expand Down Expand Up @@ -125,4 +361,31 @@
output_values.begin(), std::plus<ValueType>());
return output_values;
}

template <typename ValueType>
class FCLayerImplTBB : public FCLayerImpl<ValueType> {

Check warning on line 366 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L366

Added line #L366 was not covered by tests
public:
FCLayerImplTBB(const std::vector<ValueType>& input_weights,

Check warning on line 368 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L368

Added line #L368 was not covered by tests
const Shape& input_weights_shape,
const std::vector<ValueType>& input_bias)
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
: FCLayerImpl<ValueType>(input_weights, input_weights_shape, input_bias) {
}
std::vector<ValueType> run(const std::vector<ValueType>& input) const;
};

template <typename ValueType>
std::vector<ValueType> FCLayerImplTBB<ValueType>::run(
const std::vector<ValueType>& input) const {
if (input.size() != this->inputShape_[0]) {
throw std::invalid_argument("Input size doesn't fit FCLayer");
}
Shape cur_w_shape({this->outputShape_[0], this->inputShape_[0]});
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
std::vector<ValueType> output_values =

Check warning on line 383 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L382-L383

Added lines #L382 - L383 were not covered by tests
NeiroYT marked this conversation as resolved.
Show resolved Hide resolved
mat_vec_mul_tbb(this->weights_, cur_w_shape, input);
std::transform(output_values.begin(), output_values.end(),

Check warning on line 385 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L385

Added line #L385 was not covered by tests
this->bias_.begin(), output_values.begin(),
std::plus<ValueType>());
return output_values;

Check warning on line 388 in include/layers/FCLayer.hpp

View check run for this annotation

Codecov / codecov/patch

include/layers/FCLayer.hpp#L388

Added line #L388 was not covered by tests
}

} // namespace itlab_2023
5 changes: 4 additions & 1 deletion include/layers/Layer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include "layers/Shape.hpp"
#include "layers/Tensor.hpp"
#include "oneapi/tbb.h"

namespace itlab_2023 {

Expand All @@ -18,9 +19,11 @@ enum LayerType {
kElementWise,
kConvolution,
kFullyConnected,
kOutput,
kOutput
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
kOutput
kOutput,

};

enum ImplType { kDefault, kTBB };

class Layer {
public:
Layer() = default;
Expand Down
Loading
Loading