/*========================================================================= * * Copyright NumFOCUS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *=========================================================================*/ #ifndef itkGPUReduction_h #define itkGPUReduction_h #include "itkObject.h" #include "itkGPUDataManager.h" #include "itkGPUKernelManager.h" #include "itkOpenCLUtil.h" namespace itk { /** Create a helper GPU Kernel class for GPUReduction */ itkGPUKernelClassMacro(GPUReductionKernel); /** * \class GPUReduction * * This class encapsulate the parallel reduction algorithm. An example * of this algorithm is to compute the sum of a long array in parallel. * * \ingroup ITKGPUCommon */ template class ITK_TEMPLATE_EXPORT GPUReduction : public Object { public: ITK_DISALLOW_COPY_AND_MOVE(GPUReduction); /** Standard class type aliases. */ using Self = GPUReduction; using Superclass = Object; using Pointer = SmartPointer; using ConstPointer = SmartPointer; /** Method for creation through the object factory. */ itkNewMacro(Self); /** \see LightObject::GetNameOfClass() */ itkOverrideGetNameOfClassMacro(GPUReduction); using GPUDataPointer = GPUDataManager::Pointer; itkGetMacro(GPUDataManager, GPUDataPointer); itkGetMacro(GPUResult, TElement); itkGetMacro(CPUResult, TElement); /** Get OpenCL Kernel source as a string, creates a GetOpenCLSource method */ itkGetOpenCLSourceFromKernelMacro(GPUReductionKernel); unsigned int NextPow2(unsigned int x); bool isPow2(unsigned int x); void GetNumBlocksAndThreads(int whichKernel, int n, int maxBlocks, int maxThreads, int & blocks, int & threads); unsigned int GetReductionKernel(int whichKernel, int blockSize, int isPowOf2); void AllocateGPUInputBuffer(TElement * h_idata = nullptr); void ReleaseGPUInputBuffer(); void InitializeKernel(unsigned int size); TElement RandomTest(); TElement GPUGenerateData(); TElement CPUGenerateData(TElement * data, int size); TElement GPUReduce(cl_int n, int numThreads, int numBlocks, int maxThreads, int maxBlocks, int whichKernel, bool cpuFinalReduction, int cpuFinalThreshold, double * dTotalTime, GPUDataPointer idata, GPUDataPointer odata); protected: GPUReduction(); ~GPUReduction() override; void PrintSelf(std::ostream & os, Indent indent) const override; /** GPU kernel manager for GPUFiniteDifferenceFunction class */ GPUKernelManager::Pointer m_GPUKernelManager{}; GPUDataPointer m_GPUDataManager{}; /* GPU kernel handle for GPUComputeUpdate */ int m_ReduceGPUKernelHandle{}; int m_TestGPUKernelHandle{}; unsigned int m_Size{}; bool m_SmallBlock{}; TElement m_GPUResult, m_CPUResult{}; }; } // end namespace itk #ifndef ITK_MANUAL_INSTANTIATION # include "itkGPUReduction.hxx" #endif #endif