/*========================================================================= * * Copyright NumFOCUS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *=========================================================================*/ #ifndef itkGPUScalarAnisotropicDiffusionFunction_hxx #define itkGPUScalarAnisotropicDiffusionFunction_hxx #include "itkConstNeighborhoodIterator.h" #include "itkNeighborhoodInnerProduct.h" #include "itkNeighborhoodAlgorithm.h" #include "itkDerivativeOperator.h" #include "itkMakeUniqueForOverwrite.h" namespace itk { template GPUScalarAnisotropicDiffusionFunction::GPUScalarAnisotropicDiffusionFunction() { this->m_AnisotropicDiffusionFunctionGPUBuffer = GPUDataManager::New(); this->m_AnisotropicDiffusionFunctionGPUKernelManager = GPUKernelManager::New(); // load GPU kernel std::ostringstream defines; if (ImageDimension > 3 || ImageDimension < 1) { itkExceptionMacro("GPUScalarAnisotropicDiffusionFunction supports 1/2/3D image."); } defines << "#define DIM_" << ImageDimension << '\n'; defines << "#define PIXELTYPE "; GetTypenameInString(typeid(typename TImage::PixelType), defines); const char * GPUSource = GPUScalarAnisotropicDiffusionFunction::GetOpenCLSource(); // load and build program this->m_AnisotropicDiffusionFunctionGPUKernelManager->LoadProgramFromString(GPUSource, defines.str().c_str()); // create kernel this->m_AverageGradientMagnitudeSquaredGPUKernelHandle = this->m_AnisotropicDiffusionFunctionGPUKernelManager->CreateKernel("AverageGradientMagnitudeSquared"); } template void GPUScalarAnisotropicDiffusionFunction::GPUCalculateAverageGradientMagnitudeSquared(TImage * ip) { // GPU kernel to compute Average Squared Gradient Magnitude using GPUImageType = typename itk::GPUTraits::Type; typename GPUImageType::Pointer inPtr = dynamic_cast(ip); typename GPUImageType::SizeType outSize = inPtr->GetLargestPossibleRegion().GetSize(); int imgSize[3]; imgSize[0] = imgSize[1] = imgSize[2] = 1; float imgScale[3]; imgScale[0] = imgScale[1] = imgScale[2] = 1.0f; int ImageDim = static_cast(TImage::ImageDimension); size_t localSize[3], globalSize[3]; localSize[0] = localSize[1] = localSize[2] = 1; globalSize[0] = globalSize[1] = globalSize[2] = 1; unsigned int blockSize = OpenCLGetLocalBlockSize(ImageDim); unsigned int numPixel = 1; unsigned int bufferSize = 1; for (int i = 0; i < ImageDim; ++i) { imgSize[i] = outSize[i]; imgScale[i] = this->m_ScaleCoefficients[i]; localSize[i] = (blockSize <= outSize[i]) ? blockSize : 1; // total # of threads globalSize[i] = localSize[i] * static_cast(ceil(static_cast(outSize[i]) / static_cast(localSize[i]))); bufferSize *= globalSize[i] / localSize[i]; numPixel *= imgSize[i]; } // Initialize & Allocate GPU Buffer if (bufferSize != this->m_AnisotropicDiffusionFunctionGPUBuffer->GetBufferSize()) { this->m_AnisotropicDiffusionFunctionGPUBuffer->Initialize(); this->m_AnisotropicDiffusionFunctionGPUBuffer->SetBufferSize(sizeof(float) * bufferSize); this->m_AnisotropicDiffusionFunctionGPUBuffer->Allocate(); } typename GPUKernelManager::Pointer kernelManager = this->m_AnisotropicDiffusionFunctionGPUKernelManager; int kernelHandle = this->m_AverageGradientMagnitudeSquaredGPUKernelHandle; // Set arguments int argidx = 0; kernelManager->SetKernelArgWithImage(kernelHandle, argidx++, inPtr->GetGPUDataManager()); kernelManager->SetKernelArgWithImage(kernelHandle, argidx++, this->m_AnisotropicDiffusionFunctionGPUBuffer); // Set shared memory args if (ImageDim == 2) { kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[0] * localSize[1], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[0], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[0], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[1], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[1], nullptr); } else if (ImageDim == 3) { kernelManager->SetKernelArg( kernelHandle, argidx++, sizeof(float) * localSize[0] * localSize[1] * localSize[2], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[0] * localSize[1], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[0] * localSize[1], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[0] * localSize[2], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[0] * localSize[2], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[1] * localSize[2], nullptr); kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float) * localSize[1] * localSize[2], nullptr); } else { // no need to set shared memory args when dimension is not 2 or 3 } // Set filter scale parameter for (int i = 0; i < ImageDim; ++i) { kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(float), &(imgScale[i])); } // Set image size for (int i = 0; i < ImageDim; ++i) { kernelManager->SetKernelArg(kernelHandle, argidx++, sizeof(int), &(imgSize[i])); } // launch kernel kernelManager->LaunchKernel(kernelHandle, ImageDim, globalSize, localSize); // Read back intermediate sums from GPU and compute final value double sum = 0; const auto intermSum = make_unique_for_overwrite(bufferSize); this->m_AnisotropicDiffusionFunctionGPUBuffer->SetCPUBufferPointer(intermSum.get()); this->m_AnisotropicDiffusionFunctionGPUBuffer->SetCPUDirtyFlag(true); // // CPU // is // dirty this->m_AnisotropicDiffusionFunctionGPUBuffer->SetGPUDirtyFlag(false); this->m_AnisotropicDiffusionFunctionGPUBuffer->UpdateCPUBuffer(); // // Copy // GPU->CPU for (int i = 0; i < static_cast(bufferSize); ++i) { sum += static_cast(intermSum[i]); } this->SetAverageGradientMagnitudeSquared(static_cast(sum / static_cast(numPixel))); } } // end namespace itk #endif