/*=========================================================================
 *
 *  Copyright NumFOCUS
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *         https://www.apache.org/licenses/LICENSE-2.0.txt
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 *=========================================================================*/

//
// Apply Update : out = out + dt*buf
//
#ifdef DIM_1
float linear_interpolate_1D(__global const float *image, float PositionX, int xsize)
{
    /*  Linear interpolation variables */
    int xBas0, xBas1;
    float perc[2]={0, 0};
    float xCom, xComi;
    float color[2]={0, 0};

    /*  Rounded location  */
    float fPositionX;

    /* Determine the coordinates of the pixel(s) which will be come the current pixel */
    /* (using linear interpolation) */
    fPositionX = floor(PositionX);
    xBas0=(int) fPositionX;
    xBas1=xBas0+1;

    /* Linear interpolation constants (percentages) */
    xCom=PositionX-fPositionX;
    xComi=(1-xCom);
    perc[0]=xComi;
    perc[1]=xCom;

    if(xBas0<0) { xBas0=0; if(xBas1<0) { xBas1=0; }}
    if(xBas1>(xsize-1)) { xBas1=xsize-1; if(xBas0>(xsize-1)) { xBas0=xsize-1; }}

    color[0]=image[xBas0];
    color[1]=image[xBas1];
    return color[0]*perc[0]+color[1]*perc[1];
}
#endif

#ifdef DIM_2
float linear_interpolate_2D(__global const float *image, float PositionX, float PositionY, int xsize, int ysize)
{
    /*  Linear interpolation variables */
    int xBas0, xBas1, yBas0, yBas1;
    float perc[4]={0, 0, 0, 0};
    float xCom, yCom, xComi, yComi;
    float color[4]={0, 0, 0, 0};

    /*  Rounded location  */
    float fPositionX, fPositionY;

    /* Determine the coordinates of the pixel(s) which will be come the current pixel */
    /* (using linear interpolation) */
    fPositionX = floor(PositionX); fPositionY = floor(PositionY);
    xBas0=(int) fPositionX; yBas0=(int) fPositionY;
    xBas1=xBas0+1; yBas1=yBas0+1;

    /* Linear interpolation constants (percentages) */
    xCom=PositionX-fPositionX; yCom=PositionY-fPositionY;
    xComi=(1-xCom); yComi=(1-yCom);
    perc[0]=xComi * yComi;
    perc[1]=xComi * yCom;
    perc[2]=xCom * yComi;
    perc[3]=xCom * yCom;

    if(xBas0<0) { xBas0=0; if(xBas1<0) { xBas1=0; }}
    if(yBas0<0) { yBas0=0; if(yBas1<0) { yBas1=0; }}
    if(xBas1>(xsize-1)) { xBas1=xsize-1; if(xBas0>(xsize-1)) { xBas0=xsize-1; }}
    if(yBas1>(ysize-1)) { yBas1=ysize-1; if(yBas0>(ysize-1)) { yBas0=ysize-1; }}

    color[0]=image[xBas0+yBas0*xsize];
    color[1]=image[xBas0+yBas1*xsize];
    color[2]=image[xBas1+yBas0*xsize];
    color[3]=image[xBas1+yBas1*xsize];
    return color[0]*perc[0]+color[1]*perc[1]+color[2]*perc[2]+color[3]*perc[3];
}
#endif

#ifdef DIM_3
float linear_interpolate_3D(__global const float *image, float Tlocalx, float Tlocaly, float Tlocalz, int xsize, int ysize, int zsize)
{
    float Iout;
    /*  Linear interpolation variables */
    int xBas0, xBas1, yBas0, yBas1, zBas0, zBas1;
    float perc[8];
    float xCom, yCom, zCom;
    float xComi, yComi, zComi;
    float color[8]={0, 0, 0, 0, 0, 0, 0, 0};
    float fTlocalx, fTlocaly, fTlocalz;

    fTlocalx=floor(Tlocalx); fTlocaly=floor(Tlocaly); fTlocalz=floor(Tlocalz);

    /* Determine the coordinates of the pixel(s) which will be come the current pixel */
    /* (using linear interpolation) */
    xBas0=(int) fTlocalx; yBas0=(int) fTlocaly; zBas0=(int) fTlocalz;
    xBas1=xBas0+1;      yBas1=yBas0+1;      zBas1=zBas0+1;

    /* Clamp to boundary */
    if(xBas0<0) {xBas0=0; if(xBas1<0) { xBas1=0; }}
    if(yBas0<0) {yBas0=0; if(yBas1<0) { yBas1=0; }}
    if(zBas0<0) {zBas0=0; if(zBas1<0) { zBas1=0; }}
    if(xBas1>(xsize-1)) { xBas1=xsize-1; if(xBas0>(xsize-1)) { xBas0=xsize-1; }}
    if(yBas1>(ysize-1)) { yBas1=ysize-1; if(yBas0>(ysize-1)) { yBas0=ysize-1; }}
    if(zBas1>(zsize-1)) { zBas1=zsize-1; if(zBas0>(zsize-1)) { zBas0=zsize-1; }}

    /*  Get intensities */
#define getcolor_mindex3_float( x, y, z, sizx, sizy, sizz, I) ( I[z*sizx*sizy+y*sizx+x] )
    color[0]=getcolor_mindex3_float(xBas0, yBas0, zBas0, xsize, ysize, zsize, image);
    color[1]=getcolor_mindex3_float(xBas0, yBas0, zBas1, xsize, ysize, zsize, image);
    color[2]=getcolor_mindex3_float(xBas0, yBas1, zBas0, xsize, ysize, zsize, image);
    color[3]=getcolor_mindex3_float(xBas0, yBas1, zBas1, xsize, ysize, zsize, image);
    color[4]=getcolor_mindex3_float(xBas1, yBas0, zBas0, xsize, ysize, zsize, image);
    color[5]=getcolor_mindex3_float(xBas1, yBas0, zBas1, xsize, ysize, zsize, image);
    color[6]=getcolor_mindex3_float(xBas1, yBas1, zBas0, xsize, ysize, zsize, image);
    color[7]=getcolor_mindex3_float(xBas1, yBas1, zBas1, xsize, ysize, zsize, image);

    /* Linear interpolation constants (percentages) */
    xCom=Tlocalx-fTlocalx;  yCom=Tlocaly-fTlocaly;   zCom=Tlocalz-fTlocalz;

    xComi=(1-xCom); yComi=(1-yCom); zComi=(1-zCom);
    perc[0]=xComi * yComi; perc[1]=perc[0] * zCom; perc[0]=perc[0] * zComi;
    perc[2]=xComi * yCom;  perc[3]=perc[2] * zCom; perc[2]=perc[2] * zComi;
    perc[4]=xCom * yComi;  perc[5]=perc[4] * zCom; perc[4]=perc[4] * zComi;
    perc[6]=xCom * yCom;   perc[7]=perc[6] * zCom; perc[6]=perc[6] * zComi;

    /* Set the current pixel value */
    Iout =color[0]*perc[0]+color[1]*perc[1]+color[2]*perc[2]+color[3]*perc[3]+color[4]*perc[4]+color[5]*perc[5]+color[6]*perc[6]+color[7]*perc[7];
    return Iout;
}
#endif

#ifdef DIM_1
float ComputeGradient(__global const IMGPIXELTYPE *img, int xpos, int width)
{
  IMGPIXELTYPE derivative;

  if (xpos <= 0) return 0;
  if (xpos >= width-1) return 0;

  float derivative = (img[xpos+1] - img[xpos-1]) * 0.5;
  return derivative;
}
#endif

#ifdef DIM_2
float2 ComputeGradient(__global const IMGPIXELTYPE *img, int xpos, int ypos, int width, int height)
{
  float2 derivative;
  int gidx = ypos*width + xpos;

  if (xpos <= 0 || xpos >= width-1)
    {
    derivative.x = 0;
    }
  else
    {
    derivative.x = (img[gidx+1] - img[gidx-1]) * 0.5;
    }

  if (ypos <= 0 || ypos >= height-1)
    {
    derivative.y = 0;
    }
  else
    {
    derivative.y = (img[gidx+width] - img[gidx-width]) * 0.5;
    }

  return derivative;
}
#endif

#ifdef DIM_3
float3 ComputeGradient(__global const IMGPIXELTYPE *img, int xpos, int ypos, int zpos, int width, int height, int depth)
{
  float3 derivative;
  int gidx = zpos*width*height + ypos*width + xpos;

  if (xpos <= 0 || xpos >= width-1)
    {
    derivative.x = 0;
    }
  else
    {
    derivative.x = (img[gidx+1] - img[gidx-1]) * 0.5;
    }

  if (ypos <= 0 || ypos >= height-1)
    {
    derivative.y = 0;
    }
  else
    {
    derivative.y = (img[gidx+width] - img[gidx-width]) * 0.5;
    }

  if (zpos <= 0 || zpos >= depth-1)
    {
    derivative.z = 0;
    }
  else
    {
    derivative.z = (img[gidx+width*height] - img[gidx-width*height]) * 0.5;
    }

  return derivative;
}
#endif

#ifdef DIM_1

#define DIM 1
__kernel void ComputeUpdate(__global const IMGPIXELTYPE *fix,
                            __global const IMGPIXELTYPE *mov,
                            __global OUTPIXELTYPE *out,
                            __global BUFPIXELTYPE *buf,
                            __global int          *count, //statistics
                            __global IMGPIXELTYPE *change,
                            __global IMGPIXELTYPE *metric,
                            float normalizer,
                            int width)
{
  int gix = get_global_id(0);
  unsigned int gidx = gix;
  OUTPIXELTYPE xwarp;
  OUTPIXELTYPE diff, diffSquare, valueMov, denominator, gradSquare;

  float grad, update;
  int i;

  if(gix < width)
    {
    xwarp = gix + out[gix];
    if (xwarp < 0 || xwarp > width-1)
      {
      buf[gix] = 0;

      count[gidx]  = 0;
      change[gidx] = 0;
      metric[gidx] = 0;
      return;
      }

    valueMov = linear_interpolate_1D(mov, xwarp, width);
    diff = fix[gix] - valueMov;
    diffSquare = diff * diff;

    count[gidx]  = 1;
    metric[gidx] = diffSquare;

    grad = ComputeGradient(fix, gix, width);
    gradSquare = grad * grad;

    denominator = diffSquare / normalizer + gradSquare;
    if (denominator == 0)
      {
      buf[gix] = 0;

      change[gidx] = 0;
      return;
      }

    update   = grad * diff / denominator;
    buf[gix] = update;

    change[gidx] = update.x * update.x;
    }
}
#endif

#ifdef DIM_2

#define DIM 2
__kernel void ComputeUpdate(__global const IMGPIXELTYPE *fix,
                            __global const IMGPIXELTYPE *mov,
                            __global OUTPIXELTYPE *out,
                            __global BUFPIXELTYPE *buf,
                            __global int          *count, //statistics
                            __global IMGPIXELTYPE *change,
                            __global IMGPIXELTYPE *metric,
                            float normalizer,
                            int width, int height)
{
  int gix = get_global_id(0);
  int giy = get_global_id(1);
  unsigned int gidx = width*giy + gix;
  unsigned int gidx2 = DIM*gidx;

  OUTPIXELTYPE xwarp, ywarp;
  OUTPIXELTYPE diff, diffSquare, valueMov, denominator, gradSquare;

  float2 grad, update;

  if(gix < width && giy < height)
    {
    xwarp = gix + out[gidx2];
    ywarp = giy + out[gidx2+1];
    bool isNotValid = false;
    if (xwarp < 0 || ywarp < 0)
    {
      isNotValid = true;
    }
    if ( xwarp > width-1 || ywarp > height-1)
    {
      isNotValid = true;
    }
    if (isNotValid)
    {
      buf[gidx2]   = 0;
      buf[gidx2+1] = 0;

      count[gidx]  = 0;
      change[gidx] = 0;
      metric[gidx] = 0;
      return;
    }

    valueMov = linear_interpolate_2D(mov, xwarp, ywarp, width, height);
    diff = fix[gidx] - valueMov;
    diffSquare = diff * diff;

    count[gidx]  = 1;
    metric[gidx] = diffSquare;

    grad = ComputeGradient(fix, gix, giy, width, height);
    gradSquare = grad.x * grad.x + grad.y * grad.y;

    denominator = diffSquare / normalizer + gradSquare;
    if (denominator == 0)
      {
      buf[gidx2]   = 0;
      buf[gidx2+1] = 0;

      change[gidx] = 0;
      return;
      }

    update.x   = grad.x * diff / denominator;
    update.y   = grad.y * diff / denominator;
    buf[gidx2]   = update.x;
    buf[gidx2+1] = update.y;

    change[gidx] = update.x * update.x + update.y * update.y;
    }
}
#endif

#ifdef DIM_3

#define DIM 3

__kernel void ComputeUpdate(__global const IMGPIXELTYPE *fix,
                            __global const IMGPIXELTYPE *mov,
                            __global OUTPIXELTYPE *out,
                            __global BUFPIXELTYPE *buf,
                            __global int          *count, //statistics
                            __global IMGPIXELTYPE *change,
                            __global IMGPIXELTYPE *metric,
                            float normalizer,
                            int width, int height, int depth)
{
  int gix = get_global_id(0);
  int giy = get_global_id(1);
  int giz = get_global_id(2);
  unsigned int gidx = width*height*giz + width*giy + gix;
  unsigned int gidx2 = DIM*gidx;

  OUTPIXELTYPE xwarp, ywarp, zwarp;
  OUTPIXELTYPE diff, diffSquare, valueMov, denominator, gradSquare;

  float3 grad, update;

  /* NOTE: More than three-level nested conditional statements (e.g.,
     if A && B && C..) invalidates command queue during kernel
     execution on Apple OpenCL 1.0 (such Macbook Pro with NVIDIA 9600M
     GT). Therefore, we flattened conditional statements. */
  bool isValid = true;
  if(gix < 0 || gix >= width) isValid = false;
  if(giy < 0 || giy >= height) isValid = false;
  if(giz < 0 || giz >= depth) isValid = false;

  if(isValid)
    {

    xwarp = gix + out[gidx2];
    ywarp = giy + out[gidx2+1];
    zwarp = giz + out[gidx2+2];
    bool isOutOfRange = false;
    if (xwarp < 0 || xwarp > width-1) isOutOfRange = true;
    if (ywarp < 0 || ywarp > height-1) isOutOfRange = true;
    if (zwarp < 0 || zwarp > depth-1) isOutOfRange = true;
    if (isOutOfRange)
      {
      buf[gidx2]   = 0;
      buf[gidx2+1] = 0;
      buf[gidx2+2] = 0;

      count[gidx]  = 0;
      change[gidx] = 0;
      metric[gidx] = 0;
      return;
      }

    valueMov = linear_interpolate_3D(mov, xwarp, ywarp, zwarp, width, height, depth);
    diff = fix[gidx] - valueMov;
    diffSquare = diff * diff;

    count[gidx]  = 1;
    metric[gidx] = diffSquare;

    grad = ComputeGradient(fix, gix, giy, giz, width, height, depth);
    gradSquare = grad.x * grad.x + grad.y * grad.y + grad.z * grad.z;

    denominator = diffSquare / normalizer + gradSquare;
    // to match checks in the CPU version of the Demons Registration Function,
    // compare with 0.000000001
    if (denominator < 0.000000001)
      {
      buf[gidx2]   = 0;
      buf[gidx2+1] = 0;
      buf[gidx2+2] = 0;

      change[gidx] = 0;
      return;
      }
    // to match checks in teh CPU version of the Demons Registration Function,
    // compare diff with 0.001
    if (diff > -0.001 && diff < 0.001)
      {
      buf[gidx2]   = 0;
      buf[gidx2+1] = 0;
      buf[gidx2+2] = 0;

      change[gidx] = 0;
      return;
      }

    update.x   = grad.x * diff / denominator;
    update.y   = grad.y * diff / denominator;
    update.z   = grad.z * diff / denominator;
    buf[gidx2]   = update.x;
    buf[gidx2+1] = update.y;
    buf[gidx2+2] = update.z;

    change[gidx] = update.x * update.x + update.y * update.y + update.z * update.z;
    }
}
#endif