refer to code:
..
size_t frameByteSize = initMat.step[0] * initMat.rows;
#ifndef USE_UNIFIED_MEM
/* Pinned memory. No cache */
std::cout << "Using pinned memory" << std::endl;
void* device_ptr, * host_ptr;
cudaSetDeviceFlags(cudaDeviceMapHost);
cudaHostAlloc((void**)&host_ptr, frameByteSize, cudaHostAllocMapped);
cudaHostGetDevicePointer((void**)&device_ptr, (void*)host_ptr, 0);
cv::Mat frame_out(height, width, CV_8UC3, host_ptr);
cv::cuda::GpuMat d_frame_out(height, width, CV_8UC3, device_ptr);
#else
/* Unified memory */
std::cout << "Using unified memory" << std::endl;
void* unified_ptr;
cudaMallocManaged(&unified_ptr, frameByteSize);
cv::Mat frame_out(height, width, CV_8UC3, unified_ptr);
cv::cuda::GpuMat d_frame_out(height, width, CV_8UC3, unified_ptr);
#endif
..
another example
..
void* m_device_ptr = NULL;
void* m_host_ptr = NULL;
//memory share frame -> cuda_frame
cudaSetDeviceFlags(cudaDeviceMapHost);
size_t frameByteSize = initMat.step[0] * initMat.rows;
cudaHostAlloc((void**)&m_host_ptr, frameByteSize, cudaHostAllocMapped);
cudaHostGetDevicePointer((void**)&m_device_ptr, (void*)m_host_ptr, 0);
cv::Mat m_frame = cv::Mat(initMat.rows, initMat.cols, CV_8UC3, m_host_ptr);
cv::cuda::GpuMat m_cuda_frame = cv::cuda::GpuMat(initMat.rows, initMat.cols, CV_8UC3, m_device_ptr);
//initiation
iframes.copyTo(m_frame);
..