3/16/2015

any 4 points odering by leftTop, RightTop, RightBottom, LeftBottom

We may wish to know the order in lt, rt, rb, lb, for any of the four points.
This example is that for this case.


And see the source code operation in video



...
//#include < time.h>  
#include < opencv2\opencv.hpp>  
//#include < opencv2\gpu\gpu.hpp>  
//#include < opencv2\stitching\detail\matchers.hpp >
#include < string>  
#include < stdio.h>  
//#include < queue>

#ifdef _DEBUG          
#pragma comment(lib, "opencv_core249d.lib")  
//#pragma comment(lib, "opencv_imgproc249d.lib")   //MAT processing  
//#pragma comment(lib, "opencv_gpu249d.lib")  
#pragma comment(lib, "opencv_highgui249d.lib")
//#pragma comment(lib, "opencv_objdetect249d.lib")
//#pragma comment(lib, "opencv_calib3d249d.lib") 
//#pragma comment(lib, "opencv_nonfree249d.lib") 
//#pragma comment(lib, "opencv_features2d249d.lib") 
#else  
#pragma comment(lib, "opencv_core249.lib")  
//#pragma comment(lib, "opencv_imgproc249.lib")  
//#pragma comment(lib, "opencv_gpu249.lib")  
#pragma comment(lib, "opencv_highgui249.lib")
//#pragma comment(lib, "opencv_objdetect249.lib")
//#pragma comment(lib, "opencv_calib3d249.lib") 
//#pragma comment(lib, "opencv_nonfree249.lib") 
//#pragma comment(lib, "opencv_features2d249.lib") 
#endif     


using namespace std;  
using namespace cv;  

static void onMouse( int event, int x, int y, int, void* );
Point2f roi4point[4]={0,};
int roiIndex=0;
bool oksign = false;

Point2f MinDistFind(float x, float y, Point2f* inPoints);
void PointOrderbyConner(Point2f* inPoints, int w, int h );

int main()  
{  
 //image loading
 char fileName[100] = "./road-ahead.jpg";

 //origin
 Mat GetImg = imread( fileName );
 //copy for drawing
 Mat RoiImg;
 
 //window
 namedWindow( "set roi by 4 points", 0 );  

 //mouse callback
 setMouseCallback( "set roi by 4 points", onMouse, 0 );  
 
 //point selection until 4 points setting
 while(1)
 {

  if(oksign == true) //right button click
   break;

  //draw point
  RoiImg = GetImg.clone();
  for(int i=0; i< roiIndex; ++i)
   circle(RoiImg, roi4point[i], 5,CV_RGB(255,0,255),5);
  imshow("set roi by 4 points", RoiImg);
    
  waitKey(10);
 }



 printf("points ordered by LT, RT, RB, LB \n");
 PointOrderbyConner(roi4point, GetImg.size().width,  GetImg.size().height);
 for(int i=0; i< 4; ++i)
 {
  printf("[%d] (%.2lf, %.2lf) \n",i, roi4point[i].x, roi4point[i].y );
 }


 //drwaring
 RoiImg = GetImg.clone();
 string TestStr[4]={"LT","RT","RB","LB"};  
 putText(RoiImg, TestStr[0].c_str(), roi4point[0], CV_FONT_NORMAL, 1, Scalar(255,255,255));
 circle(RoiImg, roi4point[0], 3,CV_RGB(0,0,255));
 int i;
 for(i=1; i< roiIndex; ++i)
 {
  line(RoiImg, roi4point[i-1], roi4point[i], CV_RGB(255,0,0),1 );
  circle(RoiImg, roi4point[i], 1,CV_RGB(0,0,255),3);  
  putText(RoiImg, TestStr[i].c_str(), roi4point[i], CV_FONT_NORMAL, 1, Scalar(255,255,255));
 }

 line(RoiImg, roi4point[0], roi4point[i-1], CV_RGB(255,0,0),1 );
 imshow("set roi by 4 points2", RoiImg);


 waitKey(0);
}  

void PointOrderbyConner(Point2f* inPoints, int w, int h )
{

 vector< pair< float, float> > s_point;
 for(int i=0; i< 4; ++i)
  s_point.push_back( make_pair(inPoints[i].x, inPoints[i].y) );

 //sort
 sort(s_point.begin(), s_point.end(), [](const pair< float, float>& A, const pair< float, float>& B){ return A.second < B.second; } );

 if( s_point[0].first < s_point[1].first )
 {
  inPoints[0].x = s_point[0].first;
  inPoints[0].y = s_point[0].second;

  inPoints[1].x = s_point[1].first;
  inPoints[1].y = s_point[1].second;

 }else{
  inPoints[0].x = s_point[1].first;
  inPoints[0].y = s_point[1].second;

  inPoints[1].x = s_point[0].first;
  inPoints[1].y = s_point[0].second;
 }

 if( s_point[2].first > s_point[3].first )
 {
  inPoints[2].x = s_point[2].first;
  inPoints[2].y = s_point[2].second;

  inPoints[3].x = s_point[3].first;
  inPoints[3].y = s_point[3].second;

 }else{
  inPoints[2].x = s_point[3].first;
  inPoints[2].y = s_point[3].second;

  inPoints[3].x = s_point[2].first;
  inPoints[3].y = s_point[2].second;
 }

  

}


static void onMouse( int event, int x, int y, int, void* )  
{  
 
 
    if( event == CV_EVENT_LBUTTONDOWN && oksign==false)
 {
  //4 point select
  if(roiIndex>=4)
  {
   roiIndex=0;  
   for(int i=0; i< 4; ++i)
    roi4point[i].x = roi4point[i].y =0;
  }

  roi4point[roiIndex].x = x;
  roi4point[roiIndex].y = y;

  //point coordinate print
  printf("-(%..2lf,%.2lf), 2:(%.2lf,%.2lf), 3:(%.2lf,%.2lf), 4:(%.2lf,%.2lf)\n",  
   roi4point[0].x, roi4point[0].y,roi4point[1].x, roi4point[1].y,roi4point[2].x, roi4point[2].y,roi4point[3].x, roi4point[3].y );  
  
  roiIndex++;
 }

 if(event == CV_EVENT_RBUTTONDOWN)
 {
  //set point.
  if(roiIndex == 4)
  {
   oksign = true;
   printf("Warping Start!!!\n");
  }
 }

 
 
}  

---

After 4 points selection, click Right button on the mouse, then ordering start.
PointOrderbyConner function is main function for ordering any points to lt, rt, rb, lb.
The logic is..

1. sorted in descending order with respect to the y-coordinate.
2. at 2 coordinates of top in sorted vectors,
   finding min x value coordinate in 2 vectors, min is LT, lager is RT.
2. at 2 coordinates of bottom in sorted vectors,
   finding min x value coordinate in 2 vectors, min is LB, lager is RB.

sorry, my low English skill.
See source code more detail.


To process all arrays by reasonably small number of threads in cuda ( the explaination of tid = blockDim.x * gridDim.x )

see the this source code
...
#define N 10 //(33*1024)

__global__ void add(int *c){
 int tid = threadIdx.x + blockIdx.x * gridDim.x;

 if(tid < N)
  c[tid] = 1;
 
 
 while( tid < N)
 {
  c[tid] = 1;
  tid += blockDim.x * gridDim.x;
 }
 
}




int main(void)
{
 int c[N];
 int *dev_c;
 cudaMalloc( (void**)&dev_c, N*sizeof(int) );

 for(int i=0; i< N; ++i)
 {
  c[i] = -1;
 }

 cudaMemcpy(dev_c, c, N*sizeof(int), cudaMemcpyHostToDevice);

 add<<< 2, 2>>>(dev_c);
 cudaMemcpy(c, dev_c, N*sizeof(int), cudaMemcpyDeviceToHost );


 for(int i=0; i< N; ++i)
 {
  printf("c[%d] = %d \n" ,i, c[i] );
 }

 cudaFree( dev_c );

}


---

Why we do not create 10 threads ex) add<<<2>>> or add<5>>>
Because we have to create reasonably small number of threads, if N is larger than 10 ex) 33*1024.

This source code is example of this case.
arrays are 10, cuda threads are 4.
How to access all 10 arrays only by 4 threads.


see the page about meaning of threadIdx, blockIdx, blockDim, gridDim in the cuda detail.
(1D) -> http://study.marearts.com/2015/03/meaning-of-threadidx-blockidx-blockdim.html


In this source code, 
gridDim.x -> 2     //this means number of block of x
gridDim.y -> 1     //this means number of block of y
blockDim.x -> 2   //this means number of thread of x in a block
blockDim.y -> 1   //this means number of thread of y in a block

Our number of thread are 4, because 2*2(blocks * thread).

In add kernel function, we can access 0, 1, 2, 3 index of thread

int tid = threadIdx.x + blockIdx.x * blockDim.x;
①0+0*2=0
②1+0*2=1
③0+1*2=2
④1+1*2=3 

How to access rest of index 4, 5, 6, 7, 8, 9.
There is a calculation in while loop
while(tid
{
   c[tid] = 1;
   tid += blockDim.x + gridDim.x;
}

** first call of kernel **
#1 loop: 0+2*2=4
#2 loop: 4+2*2=8 
#3 loop: 8+2*2=12 ( but this value is false, while out!)

** second call of kernel **
#1 loop: 1+2*2=5
#2 loop: 5+2*2=9
#3 loop: 9+2*2=13 ( but this value is false, while out!)

** third call of kernel **
#1 loop: 2+2*2=6
#2 loop: 6+2*2=10 ( but this value is false, while out!)

** fourth call of kernel **
#1 loop: 3+2*2=7
#2 loop: 7+2*2=11 ( but this value is false, while out!)

So, all index of 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 can access by tid value.