/*========================================================================= * * Copyright NumFOCUS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *=========================================================================*/ #ifndef itkGPUGradientNDAnisotropicDiffusionFunction_hxx #define itkGPUGradientNDAnisotropicDiffusionFunction_hxx #include "itkNumericTraits.h" #include "itkOpenCLUtil.h" namespace itk { template double GPUGradientNDAnisotropicDiffusionFunction::m_MIN_NORM = 1.0e-10; template GPUGradientNDAnisotropicDiffusionFunction::GPUGradientNDAnisotropicDiffusionFunction() { unsigned int i, j; RadiusType r; for (i = 0; i < ImageDimension; ++i) { r[i] = 1; } this->SetRadius(r); // Dummy neighborhood used to set up the slices. Neighborhood it; it.SetRadius(r); // Slice the neighborhood m_Center = it.Size() / 2; for (i = 0; i < ImageDimension; ++i) { m_Stride[i] = it.GetStride(i); } for (i = 0; i < ImageDimension; ++i) { x_slice[i] = std::slice(m_Center - m_Stride[i], 3, m_Stride[i]); } for (i = 0; i < ImageDimension; ++i) { for (j = 0; j < ImageDimension; ++j) { // For taking derivatives in the i direction that are offset one // pixel in the j direction. xa_slice[i][j] = std::slice((m_Center + m_Stride[j]) - m_Stride[i], 3, m_Stride[i]); xd_slice[i][j] = std::slice((m_Center - m_Stride[j]) - m_Stride[i], 3, m_Stride[i]); } } // Allocate the derivative operator. m_DerivativeOperator.SetDirection(0); // Not relevant, will be applied in a slice-based // fashion. m_DerivativeOperator.SetOrder(1); m_DerivativeOperator.CreateDirectional(); // // Create GPU Kernel // std::ostringstream defines; if (TImage::ImageDimension > 3) { itkExceptionMacro("GPUGradientNDAnisotropicDiffusionFunction supports 1/2/3D image."); } defines << "#define DIM_" << TImage::ImageDimension << '\n' << "#define BLOCK_SIZE " << OpenCLGetLocalBlockSize(TImage::ImageDimension) << '\n'; std::string pixeltypename = GetTypename(typeid(typename TImage::PixelType)); defines << "#define PIXELTYPE " << pixeltypename << '\n'; #ifdef __APPLE__ // This is to work around a bug in the OpenCL compiler on Mac OS 10.6 and 10.7 with NVidia drivers // where the compiler was not handling unsigned char arguments correctly. // be sure to define the kernel arguments as ArgType in the kernel source // Using unsigned short instead of unsigned char in the kernel definition // is a known workaround to this problem. if (pixeltypename == "unsigned char") { defines << "#define ARGTYPE unsigned short\n"; } else { defines << "#define ARGTYPE " << pixeltypename << '\n'; } #else defines << "#define ARGTYPE " << pixeltypename << '\n'; #endif const char * GPUSource = GPUGradientNDAnisotropicDiffusionFunction::GetOpenCLSource(); // load and build program this->m_GPUKernelManager->LoadProgramFromString(GPUSource, defines.str().c_str()); // create kernel this->m_ComputeUpdateGPUKernelHandle = this->m_GPUKernelManager->CreateKernel("ComputeUpdate"); } template void GPUGradientNDAnisotropicDiffusionFunction::GPUComputeUpdate(const typename TImage::Pointer output, typename TImage::Pointer buffer, void * itkNotUsed(globalData)) { /** Launch GPU kernel to update buffer with output * GPU version of ComputeUpdate() - compute entire update buffer */ using GPUImageType = typename itk::GPUTraits::Type; typename GPUImageType::Pointer inPtr = dynamic_cast(output.GetPointer()); typename GPUImageType::Pointer bfPtr = dynamic_cast(buffer.GetPointer()); typename GPUImageType::SizeType outSize = bfPtr->GetLargestPossibleRegion().GetSize(); int imgSize[3]; imgSize[0] = imgSize[1] = imgSize[2] = 1; float imgScale[3]; imgScale[0] = imgScale[1] = imgScale[2] = 1.0f; int ImageDim = static_cast(TImage::ImageDimension); for (int i = 0; i < ImageDim; ++i) { imgSize[i] = outSize[i]; imgScale[i] = this->m_ScaleCoefficients[i]; } size_t localSize[3], globalSize[3]; localSize[0] = localSize[1] = localSize[2] = OpenCLGetLocalBlockSize(ImageDim); for (int i = 0; i < ImageDim; ++i) { // total # of threads globalSize[i] = localSize[i] * static_cast(ceil(static_cast(outSize[i]) / static_cast(localSize[i]))); } // arguments set up int argidx = 0; this->m_GPUKernelManager->SetKernelArgWithImage( this->m_ComputeUpdateGPUKernelHandle, argidx++, inPtr->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArgWithImage( this->m_ComputeUpdateGPUKernelHandle, argidx++, bfPtr->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArg( this->m_ComputeUpdateGPUKernelHandle, argidx++, sizeof(typename TImage::PixelType), &(m_K)); // filter scale parameter for (int i = 0; i < ImageDim; ++i) { this->m_GPUKernelManager->SetKernelArg( this->m_ComputeUpdateGPUKernelHandle, argidx++, sizeof(float), &(imgScale[i])); } // image size for (int i = 0; i < ImageDim; ++i) { this->m_GPUKernelManager->SetKernelArg(this->m_ComputeUpdateGPUKernelHandle, argidx++, sizeof(int), &(imgSize[i])); } // launch kernel this->m_GPUKernelManager->LaunchKernel(this->m_ComputeUpdateGPUKernelHandle, ImageDim, globalSize, localSize); } } // end namespace itk #endif