TemplNet: Add possibility to feed derived inputs with derivatives ins…

…tead of original inputs
DCM-UPB · Aug 20, 2019 · 3cdea47 · 3cdea47
1 parent 5152e54
commit 3cdea47
Show file tree

Hide file tree

Showing 7 changed files with 170 additions and 88 deletions.
diff --git a/benchmark/bench_templ_ffprop/main.cpp b/benchmark/bench_templ_ffprop/main.cpp
@@ -75,23 +75,23 @@ int main()
     using L1Type_s = LayerConfig<nhu1[0], actf::Sigmoid>;
     using L2Type_s = LayerConfig<nhu2[0], actf::Sigmoid>;
     using L3Type_s = LayerConfig<yndim, actf::Sigmoid>;
-    using NetType_s = TemplNet<RealT, dconf, xndim[0], L1Type_s, L2Type_s, L3Type_s>;
+    using NetType_s = TemplNet<RealT, dconf, xndim[0], xndim[0], L1Type_s, L2Type_s, L3Type_s>;
     auto tnet_s_ptr = std::make_unique<NetType_s>();
     auto &tnet_s = *tnet_s_ptr;
 
     // Medium Net
     using L1Type_m = LayerConfig<nhu1[1], actf::Sigmoid>;
     using L2Type_m = LayerConfig<nhu2[1], actf::Sigmoid>;
     using L3Type_m = LayerConfig<yndim, actf::Sigmoid>;
-    using NetType_m = TemplNet<RealT, dconf, xndim[1], L1Type_m, L2Type_m, L3Type_m>;
+    using NetType_m = TemplNet<RealT, dconf, xndim[1], xndim[1], L1Type_m, L2Type_m, L3Type_m>;
     auto tnet_m_ptr = std::make_unique<NetType_m>();
     auto &tnet_m = *tnet_m_ptr;
 
     // Large Net
     using L1Type_l = LayerConfig<nhu1[2], actf::Sigmoid>;
     using L2Type_l = LayerConfig<nhu2[2], actf::Sigmoid>;
     using L3Type_l = LayerConfig<yndim, actf::Sigmoid>;
-    using NetType_l = TemplNet<RealT, dconf, xndim[2], L1Type_l, L2Type_l, L3Type_l>;
+    using NetType_l = TemplNet<RealT, dconf, xndim[2], xndim[2], L1Type_l, L2Type_l, L3Type_l>;
     auto tnet_l_ptr = std::make_unique<NetType_l>();
     auto &tnet_l = *tnet_l_ptr;
 

diff --git a/benchmark/common/FFNNBenchmarks.hpp b/benchmark/common/FFNNBenchmarks.hpp
@@ -27,8 +27,7 @@ inline double benchmark_TemplProp(TemplNet &tnet, const double xdata[], const in
 
     timer.reset();
     for (int i = 0; i < neval; ++i) {
-        tnet.setInput(xdata + i*ninput, xdata + (i+1)*ninput);
-        tnet.FFPropagate();
+        tnet.Propagate(xdata + i*ninput);
     }
 
     return timer.elapsed();

diff --git a/include/qnets/templ/LayerPackTools.hpp b/include/qnets/templ/LayerPackTools.hpp
@@ -25,18 +25,18 @@ constexpr int nbeta_next() { return 0; }
 template <class LConf1, class LConf2, class ... Rest> // LConf2 is "next"
 constexpr int nbeta_next() { return (1 + LConf1::noutput)*LConf2::noutput; }
 
-template <typename ValueT, DerivConfig DCONF, int NET_NINPUT, int NET_NOUTPUT, int N_IN, class>
+template <typename ValueT, DerivConfig DCONF, int ORIG_NINPUT, int NET_NINPUT, int NET_NOUTPUT, int N_IN, class>
 struct LayerPackTuple_rec
 {
     using type = std::tuple<>;
 };
 
-template <typename ValueT, DerivConfig DCONF, int NET_NINPUT, int NET_NOUTPUT, int N_IN, class LConf, class ... LCONFS>
-struct LayerPackTuple_rec<ValueT, DCONF, NET_NINPUT, NET_NOUTPUT, N_IN, std::tuple<LConf, LCONFS...>>
+template <typename ValueT, DerivConfig DCONF, int ORIG_NINPUT, int NET_NINPUT, int NET_NOUTPUT, int N_IN, class LConf, class ... LCONFS>
+struct LayerPackTuple_rec<ValueT, DCONF, ORIG_NINPUT, NET_NINPUT, NET_NOUTPUT, N_IN, std::tuple<LConf, LCONFS...>>
 {
 private:
-    using layer = TemplLayer<ValueT, NET_NINPUT, NET_NOUTPUT, nbeta_next<LConf, LCONFS...>(), N_IN, LConf::noutput, typename LConf::ACTF_Type, DCONF>;
-    using rest = typename LayerPackTuple_rec<ValueT, DCONF, NET_NINPUT, NET_NOUTPUT, layer::noutput, std::tuple<LCONFS...>>::type;
+    using layer = TemplLayer<ValueT, ORIG_NINPUT, NET_NINPUT, NET_NOUTPUT, nbeta_next<LConf, LCONFS...>(), N_IN, LConf::noutput, typename LConf::ACTF_Type, DCONF>;
+    using rest = typename LayerPackTuple_rec<ValueT, DCONF, ORIG_NINPUT, NET_NINPUT, NET_NOUTPUT, layer::noutput, std::tuple<LCONFS...>>::type;
 public:
     using type = decltype(std::tuple_cat(
             std::declval<std::tuple<layer>>(),
@@ -48,13 +48,13 @@ struct LayerPackTuple_rec<ValueT, DCONF, NET_NINPUT, NET_NOUTPUT, N_IN, std::tup
 //
 // Helps to determine the full layer tuple type according to LayerConfig pack
 //
-template <typename ValueT, DerivConfig DCONF, int NET_NINPUT, class LConf, class ... LCONFS>
+template <typename ValueT, DerivConfig DCONF, int ORIG_NINPUT, int NET_NINPUT, class LConf, class ... LCONFS>
 struct LayerPackTuple
 {
 private:
     static constexpr int net_noutput = detail::net_nout<LConf, LCONFS...>();
-    using layer = TemplLayer<ValueT, NET_NINPUT, net_noutput, detail::nbeta_next<LConf, LCONFS...>(), NET_NINPUT, LConf::noutput, typename LConf::ACTF_Type, DCONF>;
-    using rest = typename detail::LayerPackTuple_rec<ValueT, DCONF, NET_NINPUT, net_noutput, layer::noutput, std::tuple<LCONFS...>>::type;
+    using layer = TemplLayer<ValueT, ORIG_NINPUT, NET_NINPUT, net_noutput, detail::nbeta_next<LConf, LCONFS...>(), NET_NINPUT, LConf::noutput, typename LConf::ACTF_Type, DCONF>;
+    using rest = typename detail::LayerPackTuple_rec<ValueT, DCONF, ORIG_NINPUT, NET_NINPUT, net_noutput, layer::noutput, std::tuple<LCONFS...>>::type;
 public:
     using type = decltype(std::tuple_cat(
             std::declval<std::tuple<layer>>(),

diff --git a/include/qnets/templ/TemplLayer.hpp b/include/qnets/templ/TemplLayer.hpp
@@ -29,23 +29,24 @@ struct LayerConfig
 
 // The actual Layer class
 //
-template <typename ValueT, int NET_NINPUT, int NET_NOUTPUT, int NBETA_NEXT, int N_IN, int N_OUT, class ACTFType, DerivConfig DCONF>
+template <typename ValueT, int ORIG_NINPUT,int NET_NINPUT, int NET_NOUTPUT, int NBETA_NEXT, int N_IN, int N_OUT, class ACTFType, DerivConfig DCONF>
 class TemplLayer: public LayerConfig<N_OUT, ACTFType>
 {
 public:
     // N_IN dependent sizes
     static constexpr int ninput = N_IN;
     static constexpr int nbeta = (N_IN + 1)*N_OUT;
+    static constexpr int orig_nin = ORIG_NINPUT;
     static constexpr int net_nin = NET_NINPUT;
     static constexpr int net_nout = NET_NOUTPUT;
 
     // Sizes which also depend on DCONF
     static constexpr StaticDFlags<DCONF> dconf{};
 
     static constexpr int nd2 = dconf.d2
-                               ? NET_NINPUT*N_OUT
+                               ? ORIG_NINPUT*N_OUT
                                : 0; // number of forward-accumulated first/second order input derivative values
-    static constexpr int nd2_prev = dconf.d2 ? NET_NINPUT*N_IN : 0; // the same number of previous layer
+    static constexpr int nd2_prev = dconf.d2 ? ORIG_NINPUT*N_IN : 0; // the same number of previous layer
 
     static_assert(NBETA_NEXT%(1 + N_OUT) == 0, ""); // -> BUG!
     static constexpr int nout_next = NBETA_NEXT/(1 + N_OUT);
@@ -121,12 +122,12 @@ class TemplLayer: public LayerConfig<N_OUT, ACTFType>
         for (int i = 0; i < N_OUT; ++i) {
             for (int j = 0; j < N_IN; ++j) {
                 const ValueT bij = beta[1 + i*(N_IN + 1) + j];
-                for (int k = 0; k < NET_NINPUT; ++k) {
-                    D1[i*NET_NINPUT + k] += bij*in_d1[j*NET_NINPUT + k];
-                    D2[i*NET_NINPUT + k] += bij*in_d2[j*NET_NINPUT + k];
+                for (int k = 0; k < ORIG_NINPUT; ++k) {
+                    D1[i*ORIG_NINPUT + k] += bij*in_d1[j*ORIG_NINPUT + k];
+                    D2[i*ORIG_NINPUT + k] += bij*in_d2[j*ORIG_NINPUT + k];
                 }
             }
-            for (int l = i*NET_NINPUT; l < (i + 1)*NET_NINPUT; ++l) {
+            for (int l = i*ORIG_NINPUT; l < (i + 1)*ORIG_NINPUT; ++l) {
                 D2[l] = _ad1[i]*D2[l] + _ad2[i]*D1[l]*D1[l];
                 D1[l] *= _ad1[i];
             }
@@ -136,7 +137,6 @@ class TemplLayer: public LayerConfig<N_OUT, ACTFType>
     // forward-accumulate second order deriv when the inputs correspond (besides shift/scale) to the true network inputs
     constexpr void _computeD2_Input()
     {
-        static_assert(N_IN == NET_NINPUT, "");
         auto &D1 = *_d1_ptr;
         auto &D2 = *_d2_ptr;
         for (int i = 0; i < N_OUT; ++i) {
@@ -153,6 +153,7 @@ class TemplLayer: public LayerConfig<N_OUT, ACTFType>
     {
         // statically secure this call (i.e. using it on non-input layer will not compile)
         static_assert(N_IN == NET_NINPUT, "[TemplLayer::ForwardInput] N_IN != NET_NINPUT");
+        static_assert(N_IN == ORIG_NINPUT, "[TemplLayer::ForwardInput] N_IN != ORIG_NINPUT");
 
         dflags = dflags.AND(dconf); // AND static and dynamic conf
         this->_computeOutput(input, dflags);
@@ -163,7 +164,7 @@ class TemplLayer: public LayerConfig<N_OUT, ACTFType>
         }
     }
 
-    // continue forward pass from previous layer
+    // continue/start forward pass from previous layer / external source
     constexpr void _forwardLayer(const ValueT input[], const ValueT in_d1[], const ValueT in_d2[], DynamicDFlags dflags)
     {
         dflags = dflags.AND(dconf); // AND static and dynamic conf
@@ -303,9 +304,9 @@ class TemplLayer: public LayerConfig<N_OUT, ACTFType>
     // Pointer: No bounds checking
 
 
-    // --- Propagation of input data (not layer)
+    // --- Propagation of original input data (not layer)
 
-    constexpr void ForwardInput(const std::array<ValueT, NET_NINPUT> &input, DynamicDFlags dflags)
+    constexpr void ForwardInput(const std::array<ValueT, ORIG_NINPUT> &input, DynamicDFlags dflags)
     {
         _forwardInput(input.begin(), dflags);
     }
@@ -316,7 +317,7 @@ class TemplLayer: public LayerConfig<N_OUT, ACTFType>
     }
 
 
-    // --- Forward Propagation of layer data
+    // --- Forward Propagation of layer data or external source
 
     constexpr void ForwardLayer(const std::array<ValueT, N_IN> &input, const std::array<ValueT, nd2_prev> &in_d1, const std::array<ValueT, nd2_prev> &in_d2, DynamicDFlags dflags)
     {

diff --git a/include/qnets/templ/TemplNet.hpp b/include/qnets/templ/TemplNet.hpp
@@ -102,36 +102,38 @@ constexpr void grad_layers_impl(const TupleT &layers, const ArrayT1 &input, Arra
 
 // --- The fully templated TemplNet FFNN
 
-template <typename ValueT, DerivConfig DCONF, int N_IN, class ... LayerConfs>
+template <typename ValueT, DerivConfig DCONF, int ORIG_N_IN, int NET_N_IN, class ... LayerConfs>
 class TemplNet
 {
 public:
     // --- Static Setup
 
     // LayerTuple type / Shape
-    using LayerTuple = typename lpack::LayerPackTuple<ValueT, DCONF, N_IN, LayerConfs...>::type;
+    using LayerTuple = typename lpack::LayerPackTuple<ValueT, DCONF, ORIG_N_IN, NET_N_IN, LayerConfs...>::type;
     using Shape = detail::TemplNetShape<LayerTuple, std::make_index_sequence<sizeof...(LayerConfs)>>;
 
     // some basic static sizes
     static constexpr int nlayer = tupl::count<int, LayerTuple>();
-    static constexpr int ninput = std::tuple_element<0, LayerTuple>::type::ninput;
+    static constexpr int orig_ninput = ORIG_N_IN; // if network inputs are a function of orig_ninput original inputs
+    static constexpr int ninput = std::tuple_element<0, LayerTuple>::type::ninput; // the number of direct network inputs
     static constexpr int noutput = std::tuple_element<nlayer - 1, LayerTuple>::type::noutput;
-    static constexpr int nbeta = lpack::countBetas<N_IN, LayerConfs...>();
+    static constexpr int nbeta = lpack::countBetas<NET_N_IN, LayerConfs...>();
     static constexpr int nunit = lpack::countUnits<LayerConfs...>();
 
     // static derivative config
     static constexpr StaticDFlags<DCONF> dconf{};
 
     // Static Output Deriv Array Sizes (depend on DCONF)
-    static constexpr int nd1 = dconf.d1 ? noutput*ninput : 0;
-    static constexpr int nd2 = dconf.d2 ? noutput*ninput : 0;
+    static constexpr int nd1_net = noutput*ninput; // helper array to calc d1 from backprop
+    static constexpr int nd1 = dconf.d1 ? noutput*orig_ninput : 0;
+    static constexpr int nd2 = dconf.d2 ? noutput*orig_ninput : 0;
     static constexpr int nvd1 = dconf.vd1 ? noutput*nbeta : 0;
     static constexpr int nvd2 = dconf.vd2 ? noutput*nbeta : 0;
 
 
     // Basic assertions
     static_assert(nlayer == static_cast<int>(sizeof...(LayerConfs)), ""); // -> BUG!
-    static_assert(ninput == N_IN, ""); // -> BUG!
+    static_assert(ninput == NET_N_IN, ""); // -> BUG!
     static_assert(noutput == Shape::nunits[nlayer - 1], ""); // -> BUG!
     static_assert(nlayer > 1, "[TemplNet] nlayer <= 1");
     static_assert(lpack::hasNoEmptyLayer<(ninput > 0), LayerConfs...>(), "[TemplNet] LayerConf pack contains empty Layer.");
@@ -147,7 +149,11 @@ class TemplNet
     const std::array<const ValueT *, nlayer> _out_begins;
     const std::array<ValueT *, nlayer> _beta_begins;
 
+    // input array
+    std::array<ValueT, ninput> _input{};
+
     // deriv arrays
+    std::array<ValueT, nd1_net> _d1_net{};
     std::array<ValueT, nd1> _d1{};
     std::array<ValueT, nd2> _d2{};
     std::array<ValueT, nvd1> _vd1{};
@@ -157,10 +163,88 @@ class TemplNet
     // dynamic (opt-out) derivative config (default to DCONF or explicit set in ctor)
     DynamicDFlags dflags{DCONF};
 
-    // input array
-    std::array<ValueT, ninput> input{};
+private:
+    // some helper methods
+
+    void _propagateLayers() // continue the initialized fwd prop
+    {
+        using namespace detail;
+
+        // continue fwd prop
+        fwdprop_layers_impl(_layers, dflags, std::make_index_sequence<nlayer - 1>{});
+
+        // backprop
+        std::get<nlayer - 1>(_layers).BackwardOutput(dflags);
+        backprop_layers_impl(_layers, dflags, std::make_index_sequence<nlayer - 1>{});
+
+        // store backprop grads into vd1/vd2
+        grad_layers_impl<0, nbeta>(_layers, _input, _vd1, _vd2, dflags, std::make_index_sequence<nlayer>{});
+    }
+
+    template <int ONIN = ORIG_N_IN>
+    typename std::enable_if<ONIN == NET_N_IN, void>::type _computeInputGradients() // use only if network input is the original input
+    {
+        // store input grads into d1/d2
+        if (dflags.d2()) { // we used forward accumulation
+            _d1 = std::get<nlayer - 1>(_layers).d1();
+            _d2 = std::get<nlayer - 1>(_layers).d2();
+        }
+        else { // compute original input derivative from backprop derivatives
+            std::get<0>(_layers).storeInputD1(_d1, dflags);
+        }
+    }
+
+    template <int ONIN = ORIG_N_IN>
+    typename std::enable_if<ONIN != NET_N_IN, void>::type _computeInputGradients()
+    {
+        throw std::runtime_error("[TemplNet::_processOrigInput] Original input derivatives require provided input-to-orig derivatives.");
+    }
+
+    void _computeInputGradients(const ValueT orig_d1[]) // used if network input is not the original input
+    {
+        // store input grads into d1/d2
+        if (dflags.d2()) { // we used forward accumulation
+            _d1 = std::get<nlayer - 1>(_layers).d1();
+            _d2 = std::get<nlayer - 1>(_layers).d2();
+        }
+        else { // compute original input derivative from backprop derivatives
+            _d1.fill(0.);
+            std::get<0>(_layers).storeInputD1(_d1_net, dflags);
+            for (int i = 0; i < noutput; ++i) {
+                for (int j = 0; j < ninput; ++j) {
+                    for (int k = 0; k < orig_ninput; ++k) {
+                        _d1[i*orig_ninput + k] += _d1_net[i*ninput + j]*orig_d1[j*orig_ninput + k];
+                    }
+                }
+            }
+        }
+    }
+
+    template <int ONIN = ORIG_N_IN>
+    typename std::enable_if<ONIN == NET_N_IN, void>::type _processOrigInput()
+    {
+        // feed original input
+        std::get<0>(_layers).ForwardInput(_input, dflags);
+        this->_propagateLayers();
+        if (dflags.d1()) { this->_computeInputGradients(); }
+    }
+
+    template <int ONIN = ORIG_N_IN>
+    typename std::enable_if<ONIN != NET_N_IN, void>::type _processOrigInput()
+    {
+        throw std::runtime_error("[TemplNet::_processOrigInput] Original input can't be fed directly, because it differs in size from network input.");
+    }
+
+    void _processDerivInput(const ValueT orig_d1[], const ValueT orig_d2[])
+    {
+        // feed derived network input
+        std::get<0>(_layers).ForwardLayer(_input.data(), orig_d1, orig_d2, dflags);
+        this->_propagateLayers();
+        if (dflags.d1()) { this->_computeInputGradients(orig_d1); }
+    }
 
 public:
+
     explicit constexpr TemplNet(DynamicDFlags init_dflags = DynamicDFlags{DCONF}):
             _out_begins(tupl::make_fcont<std::array<const ValueT *, nlayer>>(_layers, [](const auto &layer) { return &layer.out().front(); })),
             _beta_begins(tupl::make_fcont<std::array<ValueT *, nlayer>>(_layers, [](auto &layer) { return &layer.beta.front(); })),
@@ -187,7 +271,6 @@ class TemplNet
     constexpr const auto &getLayer() const { return std::get<I>(_layers); }
 
     // --- const get Value Arrays/Elements
-    constexpr const auto &getInput() const { return input; } // alternative const read of public input array
     constexpr const auto &getOutput() const { return std::get<nlayer - 1>(_layers).out(); } // get values of output layer
     constexpr ValueT getOutput(int i) const { return this->getOutput()[i]; }
     constexpr const auto &getD1() const { return _d1; } // get derivative of output with respect to input
@@ -265,46 +348,31 @@ class TemplNet
     /*
     void randomizeBetas(); // has to be changed maybe if we add beta that are not "normal" weights*/
 
-    // --- Manage the variational parameters (which may contain a subset of beta and/or non-beta parameters),
-    //     which exist only after that they are assigned to actual parameters in the network (e.g. betas)
-    //int getNVariationalParameters() { return _nvp; }
-    /*ValueT getVariationalParameter(const int &ivp);
-    void getVariationalParameter(ValueT * vp);
-    void setVariationalParameter(const int &ivp, const ValueT &vp);
-    void setVariationalParameter(const ValueT * vp);
-    */
-
-    // shortcut for (connecting and) adding substrates
-    //void enableDerivatives(bool flag_d1, bool flag_d2, bool flag_vd1);
-
-
-    // Set initial parameters
-    constexpr void setInput(int i, ValueT val) { input[i] = val; }
-    template <class IterT>
-    constexpr void setInput(IterT begin, const IterT end) { std::copy(begin, end, input.begin()); }
-    constexpr void setInput(const std::array<ValueT, ninput> &in_arr) { input = in_arr; }
-
 
     // --- Propagation
 
-    constexpr void FFPropagate()
+    constexpr void Propagate(const ValueT input[])
     {
-        using namespace detail;
-
-        // fdwprop
-        std::get<0>(_layers).ForwardInput(input, dflags);
-        fwdprop_layers_impl(_layers, dflags, std::make_index_sequence<nlayer - 1>{});
+        std::copy(input, input + ninput, _input.begin());
+        this->_processOrigInput();
+    }
 
-        // backprop
-        std::get<nlayer - 1>(_layers).BackwardOutput(dflags);
-        backprop_layers_impl(_layers, dflags, std::make_index_sequence<nlayer - 1>{});
+    constexpr void Propagate(const std::array<ValueT, ninput> &in_arr)
+    {
+        _input = in_arr;
+        this->_processOrigInput();
+    }
 
-        // store backprop grads into vd1/vd2
-        grad_layers_impl<0, nbeta>(_layers, input, _vd1, _vd2, dflags, std::make_index_sequence<nlayer>{});
+    constexpr void PropagateDerived(const ValueT input[], const ValueT orig_d1[], const ValueT orig_d2[])
+    {
+        std::copy(input, input + ninput, _input.begin());
+        this->_processDerivInput(orig_d1, orig_d2);
+    }
 
-        // store input grads into d1/d2
-        std::get<0>(_layers).storeInputD1(_d1, dflags);
-        _d2 = std::get<nlayer - 1>(_layers).d2();
+    constexpr void PropagateDerived(const std::array<ValueT, ninput> &in_arr, const std::array<ValueT, ninput*orig_ninput> &orig_d1, const std::array<ValueT, ninput*orig_ninput> &orig_d2)
+    {
+        _input = in_arr;
+        this->_processDerivInput(orig_d1.data(), orig_d2.data());
     }