/*========================================================================= * * Copyright NumFOCUS * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * https://www.apache.org/licenses/LICENSE-2.0.txt * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *=========================================================================*/ #ifndef itkGPUDemonsRegistrationFunction_hxx #define itkGPUDemonsRegistrationFunction_hxx #include "itkMacro.h" #include "itkMath.h" namespace itk { template GPUDemonsRegistrationFunction::GPUDemonsRegistrationFunction() { RadiusType r; unsigned int j; for (j = 0; j < ImageDimension; ++j) { r[j] = 0; } this->SetRadius(r); m_TimeStep = 1.0; m_DenominatorThreshold = 1e-9; m_IntensityDifferenceThreshold = 0.001; this->SetMovingImage(nullptr); this->SetFixedImage(nullptr); // m_FixedImageSpacing.Fill( 1.0 ); // m_FixedImageOrigin.Fill( 0.0 ); m_Normalizer = 1.0; m_FixedImageGradientCalculator = GradientCalculatorType::New(); auto interp = DefaultInterpolatorType::New(); m_MovingImageInterpolator = static_cast(interp.GetPointer()); m_Metric = NumericTraits::max(); m_SumOfSquaredDifference = 0.0; m_NumberOfPixelsProcessed = 0L; m_RMSChange = NumericTraits::max(); m_SumOfSquaredChange = 0.0; m_MovingImageGradientCalculator = MovingImageGradientCalculatorType::New(); m_UseMovingImageGradient = false; /*** Prepare GPU opencl program ***/ m_GPUPixelCounter = nullptr; m_GPUSquaredChange = nullptr; m_GPUSquaredDifference = nullptr; std::ostringstream defines; if (TDisplacementField::ImageDimension > 3 || TDisplacementField::ImageDimension < 1) { itkExceptionMacro("GPUDenseFiniteDifferenceImageFilter supports 1/2/3D image."); } defines << "#define DIM_" << TDisplacementField::ImageDimension << '\n'; defines << "#define IMGPIXELTYPE "; GetTypenameInString(typeid(typename TFixedImage::PixelType), defines); defines << "#define BUFPIXELTYPE "; GetTypenameInString(typeid(typename TDisplacementField::PixelType::ValueType), defines); defines << "#define OUTPIXELTYPE "; GetTypenameInString(typeid(typename TDisplacementField::PixelType::ValueType), defines); const char * GPUSource = GPUDemonsRegistrationFunction::GetOpenCLSource(); // load and build program this->m_GPUKernelManager->LoadProgramFromString(GPUSource, defines.str().c_str()); // create kernel m_ComputeUpdateGPUKernelHandle = this->m_GPUKernelManager->CreateKernel("ComputeUpdate"); } template void GPUDemonsRegistrationFunction::PrintSelf(std::ostream & os, Indent indent) const { Superclass::PrintSelf(os, indent); os << indent << "ZeroUpdateReturn: " << static_cast::PrintType>(m_ZeroUpdateReturn) << std::endl; os << indent << "Normalizer: " << m_Normalizer << std::endl; itkPrintSelfObjectMacro(FixedImageGradientCalculator); itkPrintSelfObjectMacro(MovingImageGradientCalculator); os << indent << "UseMovingImageGradient: " << (m_UseMovingImageGradient ? "On" : "Off") << std::endl; itkPrintSelfObjectMacro(MovingImageInterpolator); os << indent << "TimeStep: " << static_cast::PrintType>(m_TimeStep) << std::endl; os << indent << "DenominatorThreshold: " << m_DenominatorThreshold << std::endl; os << indent << "IntensityDifferenceThreshold: " << m_IntensityDifferenceThreshold << std::endl; os << indent << "Metric: " << m_Metric << std::endl; os << indent << "SumOfSquaredDifference: " << m_SumOfSquaredDifference << std::endl; os << indent << "NumberOfPixelsProcessed: " << static_cast::PrintType>(m_NumberOfPixelsProcessed) << std::endl; os << indent << "RMSChange: " << m_RMSChange << std::endl; os << indent << "SumOfSquaredChange: " << m_SumOfSquaredChange << std::endl; itkPrintSelfObjectMacro(GPUPixelCounter); itkPrintSelfObjectMacro(GPUSquaredChange); itkPrintSelfObjectMacro(GPUSquaredDifference); } template void GPUDemonsRegistrationFunction::SetIntensityDifferenceThreshold( double threshold) { m_IntensityDifferenceThreshold = threshold; } template double GPUDemonsRegistrationFunction::GetIntensityDifferenceThreshold() const { return m_IntensityDifferenceThreshold; } template void GPUDemonsRegistrationFunction::InitializeIteration() { if (!this->GetMovingImage() || !this->GetFixedImage() || !m_MovingImageInterpolator) { itkExceptionMacro("MovingImage, FixedImage and/or Interpolator not set"); } // cache fixed image information SpacingType fixedImageSpacing = this->GetFixedImage()->GetSpacing(); m_ZeroUpdateReturn.Fill(0.0); // compute the normalizer m_Normalizer = 0.0; for (unsigned int k = 0; k < ImageDimension; ++k) { m_Normalizer += fixedImageSpacing[k] * fixedImageSpacing[k]; } m_Normalizer /= static_cast(ImageDimension); // setup gradient calculator m_FixedImageGradientCalculator->SetInputImage(this->GetFixedImage()); m_MovingImageGradientCalculator->SetInputImage(this->GetMovingImage()); // setup moving image interpolator m_MovingImageInterpolator->SetInputImage(this->GetMovingImage()); // initialize metric computation variables m_SumOfSquaredDifference = 0.0; m_NumberOfPixelsProcessed = 0L; m_SumOfSquaredChange = 0.0; } template void GPUDemonsRegistrationFunction::GPUAllocateMetricData( unsigned int numPixels) { // allocate gpu buffers for statistics // if (m_GPUPixelCounter == (GPUReduction::Pointer)nullptr) m_GPUPixelCounter = GPUReduction::New(); m_GPUSquaredChange = GPUReduction::New(); m_GPUSquaredDifference = GPUReduction::New(); m_GPUPixelCounter->InitializeKernel(numPixels); m_GPUSquaredChange->InitializeKernel(numPixels); m_GPUSquaredDifference->InitializeKernel(numPixels); m_GPUPixelCounter->AllocateGPUInputBuffer(); m_GPUSquaredChange->AllocateGPUInputBuffer(); m_GPUSquaredDifference->AllocateGPUInputBuffer(); } template void GPUDemonsRegistrationFunction::GPUReleaseMetricData() { m_GPUPixelCounter->ReleaseGPUInputBuffer(); m_GPUSquaredChange->ReleaseGPUInputBuffer(); m_GPUSquaredDifference->ReleaseGPUInputBuffer(); } template void GPUDemonsRegistrationFunction::GPUComputeUpdate( DisplacementFieldTypePointer output, DisplacementFieldTypePointer update, void * itkNotUsed(gd)) { auto * fixedImage = const_cast(this->GetFixedImage()); auto * movingImage = const_cast(this->GetMovingImage()); typename DisplacementFieldType::SizeType outSize = output->GetLargestPossibleRegion().GetSize(); int imgSize[3]; imgSize[0] = imgSize[1] = imgSize[2] = 1; int ImageDim = static_cast(DisplacementFieldType::ImageDimension); for (int i = 0; i < ImageDim; ++i) { imgSize[i] = outSize[i]; } size_t localSize[3], globalSize[3]; localSize[0] = localSize[1] = localSize[2] = OpenCLGetLocalBlockSize(ImageDim); for (int i = 0; i < ImageDim; ++i) { // total # of threads globalSize[i] = localSize[i] * static_cast(ceil(static_cast(outSize[i]) / static_cast(localSize[i]))); } float normalizer = 1; // arguments set up int argidx = 0; this->m_GPUKernelManager->SetKernelArgWithImage( m_ComputeUpdateGPUKernelHandle, argidx++, fixedImage->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArgWithImage( m_ComputeUpdateGPUKernelHandle, argidx++, movingImage->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArgWithImage( m_ComputeUpdateGPUKernelHandle, argidx++, output->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArgWithImage( m_ComputeUpdateGPUKernelHandle, argidx++, update->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArgWithImage( m_ComputeUpdateGPUKernelHandle, argidx++, m_GPUPixelCounter->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArgWithImage( m_ComputeUpdateGPUKernelHandle, argidx++, m_GPUSquaredChange->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArgWithImage( m_ComputeUpdateGPUKernelHandle, argidx++, m_GPUSquaredDifference->GetGPUDataManager()); this->m_GPUKernelManager->SetKernelArg(m_ComputeUpdateGPUKernelHandle, argidx++, sizeof(float), &(normalizer)); for (int i = 0; i < ImageDim; ++i) { this->m_GPUKernelManager->SetKernelArg(m_ComputeUpdateGPUKernelHandle, argidx++, sizeof(int), &(imgSize[i])); } // launch kernel this->m_GPUKernelManager->LaunchKernel( m_ComputeUpdateGPUKernelHandle, static_cast(DisplacementFieldType::ImageDimension), globalSize, localSize); // compute statistics m_GPUPixelCounter->GPUGenerateData(); m_GPUSquaredChange->GPUGenerateData(); m_GPUSquaredDifference->GPUGenerateData(); m_SumOfSquaredDifference = m_GPUSquaredDifference->GetGPUResult(); m_NumberOfPixelsProcessed = m_GPUPixelCounter->GetGPUResult(); m_SumOfSquaredChange = m_GPUSquaredChange->GetGPUResult(); if (m_NumberOfPixelsProcessed) { m_Metric = m_SumOfSquaredDifference / static_cast(m_NumberOfPixelsProcessed); m_RMSChange = std::sqrt(m_SumOfSquaredChange / static_cast(m_NumberOfPixelsProcessed)); } } template auto GPUDemonsRegistrationFunction::ComputeUpdate( const NeighborhoodType & it, void * gd, const FloatOffsetType & itkNotUsed(offset)) -> PixelType { // Get fixed image related information // Note: no need to check the index is within // fixed image buffer. This is done by the external filter. const IndexType index = it.GetIndex(); const auto fixedValue = static_cast(this->GetFixedImage()->GetPixel(index)); // Get moving image related information PointType mappedPoint; this->GetFixedImage()->TransformIndexToPhysicalPoint(index, mappedPoint); for (unsigned int j = 0; j < ImageDimension; ++j) { mappedPoint[j] += it.GetCenterPixel()[j]; } double movingValue; if (m_MovingImageInterpolator->IsInsideBuffer(mappedPoint)) { movingValue = m_MovingImageInterpolator->Evaluate(mappedPoint); } else { return m_ZeroUpdateReturn; } CovariantVectorType gradient; // Compute the gradient of either fixed or moving image if (!m_UseMovingImageGradient) { gradient = m_FixedImageGradientCalculator->EvaluateAtIndex(index); } else { gradient = m_MovingImageGradientCalculator->Evaluate(mappedPoint); } double gradientSquaredMagnitude = 0; for (unsigned int j = 0; j < ImageDimension; ++j) { gradientSquaredMagnitude += itk::Math::sqr(gradient[j]); } // Compute Update. // In the original equation the denominator is defined as (g-f)^2 + grad_mag^2. // However there is a mismatch in units between the two terms. // The units for the second term is intensity^2/mm^2 while the // units for the first term is intensity^2. This mismatch is particularly // problematic when the fixed image does not have unit spacing. // In this implementation, we normalize the first term by a factor K, // such that denominator = (g-f)^2/K + grad_mag^2 // where K = mean square spacing to compensate for the mismatch in units. const double speedValue = fixedValue - movingValue; const double sqr_speedValue = itk::Math::sqr(speedValue); // update the metric auto * globalData = (GlobalDataStruct *)gd; if (globalData) { globalData->m_SumOfSquaredDifference += sqr_speedValue; globalData->m_NumberOfPixelsProcessed += 1; } const double denominator = sqr_speedValue / m_Normalizer + gradientSquaredMagnitude; if (itk::Math::abs(speedValue) < m_IntensityDifferenceThreshold || denominator < m_DenominatorThreshold) { return m_ZeroUpdateReturn; } PixelType update; for (unsigned int j = 0; j < ImageDimension; ++j) { update[j] = speedValue * gradient[j] / denominator; if (globalData) { globalData->m_SumOfSquaredChange += itk::Math::sqr(update[j]); } } return update; } template void GPUDemonsRegistrationFunction::ReleaseGlobalDataPointer(void * gd) const { const std::unique_ptr globalData(static_cast(gd)); const std::lock_guard lockGuard(m_MetricCalculationMutex); m_SumOfSquaredDifference += globalData->m_SumOfSquaredDifference; m_NumberOfPixelsProcessed += globalData->m_NumberOfPixelsProcessed; m_SumOfSquaredChange += globalData->m_SumOfSquaredChange; if (m_NumberOfPixelsProcessed) { m_Metric = m_SumOfSquaredDifference / static_cast(m_NumberOfPixelsProcessed); m_RMSChange = std::sqrt(m_SumOfSquaredChange / static_cast(m_NumberOfPixelsProcessed)); } } } // end namespace itk #endif