Introduction
OpenCL is a cross-platform framework used mostly for GPGPU (General-purpose computing on graphics processing units). There are plenty of tutorials available on image processing with OpenCL using C/C++, however there's not much information that would cover OpenCL image processing with .NET.I won't go into details about OpenCL kernels/queues/etc. (there's plenty of information available on the internet), however I'll provide you with a bare minimum code required to load an image from disk, process it with OpenCL on the GPU and save it back to a file.
Before we get started, make sure that you download the source code of OpenCL.NET from http://openclnet.codeplex.com/ and add it to your project.
We'll use a simple OpenCL kernel that converts an input image into a grayscale image. The kernel should be saved to a separate file.
Kernel source code:
__kernel void imagingTest(__read_only image2d_t srcImg, __write_only image2d_t dstImg) { const sampler_t smp = CLK_NORMALIZED_COORDS_FALSE | //Natural coordinates CLK_ADDRESS_CLAMP_TO_EDGE | //Clamp to zeros CLK_FILTER_LINEAR; int2 coord = (int2)(get_global_id(0), get_global_id(1)); uint4 bgra = read_imageui(srcImg, smp, coord); //The byte order is BGRA float4 bgrafloat = convert_float4(bgra) / 255.0f; //Convert to normalized [0..1] float //Convert RGB to luminance (make the image grayscale). float luminance = sqrt(0.241f * bgrafloat.z * bgrafloat.z + 0.691f * bgrafloat.y * bgrafloat.y + 0.068f * bgrafloat.x * bgrafloat.x); bgra.x = bgra.y = bgra.z = (uint) (luminance * 255.0f); bgra.w = 255; write_imageui(dstImg, coord, bgra); }
Namespaces Used
using System; using System.Collections; using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.IO; using System.Runtime.InteropServices; using OpenCL.Net;
Error handling
Since OpenCL.NET is a wrapper for C API, we'll have to do all the error checking on our own. I'm using the following two methods:
private void CheckErr(Cl.ErrorCode err, string name) { if (err != Cl.ErrorCode.Success) { Console.WriteLine("ERROR: " + name + " (" + err.ToString() + ")"); } } private void ContextNotify(string errInfo, byte[] data, IntPtr cb, IntPtr userData) { Console.WriteLine("OpenCL Notification: " + errInfo); }
Setting Up
The following two variables should be declared in the class itself and will be shared across all of the methods:
private Cl.Context _context; private Cl.Device _device;
And this is the method that sets up OpenCL:
private void Setup () { Cl.ErrorCode error; Cl.Platform[] platforms = Cl.GetPlatformIDs (out error); List<Cl.Device> devicesList = new List<Cl.Device> (); CheckErr (error, "Cl.GetPlatformIDs"); foreach (Cl.Platform platform in platforms) { string platformName = Cl.GetPlatformInfo (platform, Cl.PlatformInfo.Name, out error).ToString (); Console.WriteLine ("Platform: " + platformName); CheckErr (error, "Cl.GetPlatformInfo"); //We will be looking only for GPU devices foreach (Cl.Device device in Cl.GetDeviceIDs(platform, Cl.DeviceType.Gpu, out error)) { CheckErr (error, "Cl.GetDeviceIDs"); Console.WriteLine ("Device: " + device.ToString ()); devicesList.Add (device); } } if (devicesList.Count <= 0) { Console.WriteLine ("No devices found."); return; } _device = devicesList[0]; if (Cl.GetDeviceInfo(_device, Cl.DeviceInfo.ImageSupport, out error).CastTo<Cl.Bool>() == Cl.Bool.False) { Console.WriteLine("No image support."); return; } _context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out error); //Second parameter is amount of devices CheckErr(error, "Cl.CreateContext"); }
The Image Processing Part
The main problem is that OpenCL.NET is a wrapper around C API of OpenCL, so it can only work with unmanaged memory. However, all of the data in .NET is managed, so we'll have to marshal the data between managed/unmanaged memory. Usually it would be much easier to handle the RGBA color components in float [0..1] space. However, the input image should be in byte[] array, because it would really affect the performance to do the byte=>float conversion on the CPU (we would have to divide each component by 255 for every pixel of the image twice - once before the image processing and once after).
public void ImagingTest (string inputImagePath, string outputImagePath) { Cl.ErrorCode error; //Load and compile kernel source code. string programPath = Environment.CurrentDirectory + "/../../ImagingTest.cl"; //The path to the source file may vary if (!System.IO.File.Exists (programPath)) { Console.WriteLine ("Program doesn't exist at path " + programPath); return; } string programSource = System.IO.File.ReadAllText (programPath); using (Cl.Program program = Cl.CreateProgramWithSource(_context, 1, new[] { programSource }, null, out error)) { CheckErr(error, "Cl.CreateProgramWithSource"); //Compile kernel source error = Cl.BuildProgram (program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero); CheckErr(error, "Cl.BuildProgram"); //Check for any compilation errors if (Cl.GetProgramBuildInfo (program, _device, Cl.ProgramBuildInfo.Status, out error).CastTo<Cl.BuildStatus>() != Cl.BuildStatus.Success) { CheckErr(error, "Cl.GetProgramBuildInfo"); Console.WriteLine("Cl.GetProgramBuildInfo != Success"); Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, Cl.ProgramBuildInfo.Log, out error)); return; } //Create the required kernel (entry function) Cl.Kernel kernel = Cl.CreateKernel(program, "imagingTest", out error); CheckErr(error, "Cl.CreateKernel"); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); //Image's RGBA data converted to an unmanaged[] array byte[] inputByteArray; //OpenCL memory buffer that will keep our image's byte[] data. Cl.Mem inputImage2DBuffer; Cl.ImageFormat clImageFormat = new Cl.ImageFormat(Cl.ChannelOrder.RGBA, Cl.ChannelType.Unsigned_Int8); int inputImgWidth, inputImgHeight; int inputImgBytesSize; int inputImgStride; //Try loading the input image using (FileStream imageFileStream = new FileStream(inputImagePath, FileMode.Open) ) { System.Drawing.Image inputImage = System.Drawing.Image.FromStream( imageFileStream ); if (inputImage == null) { Console.WriteLine("Unable to load input image"); return; } inputImgWidth = inputImage.Width; inputImgHeight = inputImage.Height; System.Drawing.Bitmap bmpImage = new System.Drawing.Bitmap(inputImage); //Get raw pixel data of the bitmap //The format should match the format of clImageFormat BitmapData bitmapData = bmpImage.LockBits( new Rectangle(0, 0, bmpImage.Width, bmpImage.Height), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb);//inputImage.PixelFormat); inputImgStride = bitmapData.Stride; inputImgBytesSize = bitmapData.Stride * bitmapData.Height; //Copy the raw bitmap data to an unmanaged byte[] array inputByteArray = new byte[inputImgBytesSize]; Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImgBytesSize); //Allocate OpenCL image memory buffer inputImage2DBuffer = Cl.CreateImage2D(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, clImageFormat, (IntPtr)bitmapData.Width, (IntPtr)bitmapData.Height, (IntPtr)0, inputByteArray, out error); CheckErr(error, "Cl.CreateImage2D input"); } //Unmanaged output image's raw RGBA byte[] array byte[] outputByteArray = new byte[inputImgBytesSize]; //Allocate OpenCL image memory buffer Cl.Mem outputImage2DBuffer = Cl.CreateImage2D(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.WriteOnly, clImageFormat, (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)0, outputByteArray, out error); CheckErr(error, "Cl.CreateImage2D output"); //Pass the memory buffers to our kernel function error = Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, inputImage2DBuffer); error |= Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, outputImage2DBuffer); CheckErr(error, "Cl.SetKernelArg"); //Create a command queue, where all of the commands for execution will be added Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties)0, out error); CheckErr(error, "Cl.CreateCommandQueue"); Cl.Event clevent; //Copy input image from the host to the GPU. IntPtr[] originPtr = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }; //x, y, z IntPtr[] regionPtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; //x, y, z IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; error = Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, Cl.Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputByteArray, 0, null, out clevent); CheckErr(error, "Cl.EnqueueWriteImage"); //Execute our kernel (OpenCL code) error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent); CheckErr(error, "Cl.EnqueueNDRangeKernel"); //Wait for completion of all calculations on the GPU. error = Cl.Finish(cmdQueue); CheckErr(error, "Cl.Finish"); //Read the processed image from GPU to raw RGBA data byte[] array error = Cl.EnqueueReadImage(cmdQueue, outputImage2DBuffer, Cl.Bool.True, originPtr, regionPtr, (IntPtr)0, (IntPtr)0, outputByteArray, 0, null, out clevent); CheckErr(error, "Cl.clEnqueueReadImage"); //Clean up memory Cl.ReleaseKernel(kernel); Cl.ReleaseCommandQueue(cmdQueue); Cl.ReleaseMemObject(inputImage2DBuffer); Cl.ReleaseMemObject(outputImage2DBuffer); //Get a pointer to our unmanaged output byte[] array GCHandle pinnedOutputArray = GCHandle.Alloc(outputByteArray, GCHandleType.Pinned); IntPtr outputBmpPointer = pinnedOutputArray.AddrOfPinnedObject(); //Create a new bitmap with processed data and save it to a file. Bitmap outputBitmap = new Bitmap(inputImgWidth, inputImgHeight, inputImgStride, PixelFormat.Format32bppArgb, outputBmpPointer); outputBitmap.Save(outputImagePath, System.Drawing.Imaging.ImageFormat.Png); pinnedOutputArray.Free(); } }
Now you should have a good foundation for more complex image processing effects on the GPU.