EVOLUTION-MANAGER
Edit File: PartialReduxEvaluator.h
// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2011-2018 Gael Guennebaud <gael.guennebaud@inria.fr> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef EIGEN_PARTIALREDUX_H #define EIGEN_PARTIALREDUX_H namespace Eigen { namespace internal { /*************************************************************************** * * This file provides evaluators for partial reductions. * There are two modes: * * - scalar path: simply calls the respective function on the column or row. * -> nothing special here, all the tricky part is handled by the return * types of VectorwiseOp's members. They embed the functor calling the * respective DenseBase's member function. * * - vectorized path: implements a packet-wise reductions followed by * some (optional) processing of the outcome, e.g., division by n for mean. * * For the vectorized path let's observe that the packet-size and outer-unrolling * are both decided by the assignement logic. So all we have to do is to decide * on the inner unrolling. * * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h, * but be need to be careful to specify correct increment. * ***************************************************************************/ /* logic deciding a strategy for unrolling of vectorized paths */ template<typename Func, typename Evaluator> struct packetwise_redux_traits { enum { OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime, Cost = OuterSize == Dynamic ? HugeCost : OuterSize * Evaluator::CoeffReadCost + (OuterSize-1) * functor_traits<Func>::Cost, Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling }; }; /* Value to be returned when size==0 , by default let's return 0 */ template<typename PacketType,typename Func> EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const Func& ) { return pset1<PacketType>(0); } /* For products the default is 1 */ template<typename PacketType,typename Scalar> EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const scalar_product_op<Scalar,Scalar>& ) { return pset1<PacketType>(1); } /* Perform the actual reduction */ template<typename Func, typename Evaluator, int Unrolling = packetwise_redux_traits<Func, Evaluator>::Unrolling > struct packetwise_redux_impl; /* Perform the actual reduction with unrolling */ template<typename Func, typename Evaluator> struct packetwise_redux_impl<Func, Evaluator, CompleteUnrolling> { typedef redux_novec_unroller<Func,Evaluator, 0, Evaluator::SizeAtCompileTime> Base; typedef typename Evaluator::Scalar Scalar; template<typename PacketType> EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator &eval, const Func& func, Index /*size*/) { return redux_vec_unroller<Func, Evaluator, 0, packetwise_redux_traits<Func, Evaluator>::OuterSize>::template run<PacketType>(eval,func); } }; /* Add a specialization of redux_vec_unroller for size==0 at compiletime. * This specialization is not required for general reductions, which is * why it is defined here. */ template<typename Func, typename Evaluator, int Start> struct redux_vec_unroller<Func, Evaluator, Start, 0> { template<typename PacketType> EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator &, const Func& f) { return packetwise_redux_empty_value<PacketType>(f); } }; /* Perform the actual reduction for dynamic sizes */ template<typename Func, typename Evaluator> struct packetwise_redux_impl<Func, Evaluator, NoUnrolling> { typedef typename Evaluator::Scalar Scalar; typedef typename redux_traits<Func, Evaluator>::PacketType PacketScalar; template<typename PacketType> EIGEN_DEVICE_FUNC static PacketType run(const Evaluator &eval, const Func& func, Index size) { if(size==0) return packetwise_redux_empty_value<PacketType>(func); const Index size4 = (size-1)&(~3); PacketType p = eval.template packetByOuterInner<Unaligned,PacketType>(0,0); Index i = 1; // This loop is optimized for instruction pipelining: // - each iteration generates two independent instructions // - thanks to branch prediction and out-of-order execution we have independent instructions across loops for(; i<size4; i+=4) p = func.packetOp(p, func.packetOp( func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+0,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+1,0)), func.packetOp(eval.template packetByOuterInner<Unaligned,PacketType>(i+2,0),eval.template packetByOuterInner<Unaligned,PacketType>(i+3,0)))); for(; i<size; ++i) p = func.packetOp(p, eval.template packetByOuterInner<Unaligned,PacketType>(i,0)); return p; } }; template< typename ArgType, typename MemberOp, int Direction> struct evaluator<PartialReduxExpr<ArgType, MemberOp, Direction> > : evaluator_base<PartialReduxExpr<ArgType, MemberOp, Direction> > { typedef PartialReduxExpr<ArgType, MemberOp, Direction> XprType; typedef typename internal::nested_eval<ArgType,1>::type ArgTypeNested; typedef typename internal::add_const_on_value_type<ArgTypeNested>::type ConstArgTypeNested; typedef typename internal::remove_all<ArgTypeNested>::type ArgTypeNestedCleaned; typedef typename ArgType::Scalar InputScalar; typedef typename XprType::Scalar Scalar; enum { TraversalSize = Direction==int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) }; typedef typename MemberOp::template Cost<int(TraversalSize)> CostOpType; enum { CoeffReadCost = TraversalSize==Dynamic ? HugeCost : TraversalSize==0 ? 1 : TraversalSize * evaluator<ArgType>::CoeffReadCost + int(CostOpType::value), _ArgFlags = evaluator<ArgType>::Flags, _Vectorizable = bool(int(_ArgFlags)&PacketAccessBit) && bool(MemberOp::Vectorizable) && (Direction==int(Vertical) ? bool(_ArgFlags&RowMajorBit) : (_ArgFlags&RowMajorBit)==0) && (TraversalSize!=0), Flags = (traits<XprType>::Flags&RowMajorBit) | (evaluator<ArgType>::Flags&(HereditaryBits&(~RowMajorBit))) | (_Vectorizable ? PacketAccessBit : 0) | LinearAccessBit, Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized }; EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) { EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize==Dynamic ? HugeCost : (TraversalSize==0 ? 1 : int(CostOpType::value))); EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); } typedef typename XprType::CoeffReturnType CoeffReturnType; EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const { return coeff(Direction==Vertical ? j : i); } EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { return m_functor(m_arg.template subVector<DirectionType(Direction)>(index)); } template<int LoadMode,typename PacketType> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index i, Index j) const { return packet<LoadMode,PacketType>(Direction==Vertical ? j : i); } template<int LoadMode,typename PacketType> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packet(Index idx) const { enum { PacketSize = internal::unpacket_traits<PacketType>::size }; typedef Block<const ArgTypeNestedCleaned, Direction==Vertical ? int(ArgType::RowsAtCompileTime) : int(PacketSize), Direction==Vertical ? int(PacketSize) : int(ArgType::ColsAtCompileTime), true /* InnerPanel */> PanelType; PanelType panel(m_arg, Direction==Vertical ? 0 : idx, Direction==Vertical ? idx : 0, Direction==Vertical ? m_arg.rows() : Index(PacketSize), Direction==Vertical ? Index(PacketSize) : m_arg.cols()); // FIXME // See bug 1612, currently if PacketSize==1 (i.e. complex<double> with 128bits registers) then the storage-order of panel get reversed // and methods like packetByOuterInner do not make sense anymore in this context. // So let's just by pass "vectorization" in this case: if(PacketSize==1) return internal::pset1<PacketType>(coeff(idx)); typedef typename internal::redux_evaluator<PanelType> PanelEvaluator; PanelEvaluator panel_eval(panel); typedef typename MemberOp::BinaryOp BinaryOp; PacketType p = internal::packetwise_redux_impl<BinaryOp,PanelEvaluator>::template run<PacketType>(panel_eval,m_functor.binaryFunc(),m_arg.outerSize()); return p; } protected: ConstArgTypeNested m_arg; const MemberOp m_functor; }; } // end namespace internal } // end namespace Eigen #endif // EIGEN_PARTIALREDUX_H