PrefixSumsDev.CopyToHost(ref lastPrefixSum, (nearestCount - 1) * sizeof(int)) TrisCountDevice.CopyToHost(ref lastTrisCount, (nearestCount - 1) * sizeof(int)) KernelMarchingCubesCases.Run(voxelsDev.DevicePointer, width, height, depth, offsetsDev.DevicePointer, trisCountDevice.DevicePointer, nearestW, nearestH, nearestD) ĬudaDeviceVariable prefixSumsDev = prefixScan.PrefixSumArray(trisCountDevice, nearestCount) Typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelMarchingCubesCases, gridDimensionsDecremented) KernelMarchingCubesCases.BlockDimensions = blockDimensions Int nearestCount = nearestW * nearestH * nearestD ĬudaDeviceVariable trisCountDevice = new CudaDeviceVariable(nearestCount) ĬudaDeviceVariable offsetsDev = new CudaDeviceVariable(countDecremented) Int nearestD = NearestPowerOfTwo(depthD) Int nearestH = NearestPowerOfTwo(heightD) Int nearestW = NearestPowerOfTwo(widthD) KernelNormalAmbient.Run(voxelsDev.DevicePointer, width, height, depth,, ) Typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelNormalAmbient, gridDimensions) KernelNormalAmbient.BlockDimensions = blockDimensions KernelPositionWeight.Run(voxelsDev.DevicePointer, width, height, depth) Typeof(CudaKernel).GetField("_gridDim", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(kernelPositionWeight, gridDimensions) KernelPositionWeight.BlockDimensions = blockDimensions Int countDecremented = widthD * heightD * depthD ĭim3 blockDimensions = new dim3(8, 8, 8) ĭim3 gridDimensions = new dim3((int)Math.Ceiling(width / 8.0), (int)Math.Ceiling(height / 8.0), (int)Math.Ceiling(depth / 8.0)) ĭim3 gridDimensionsDecremented = new dim3((int)Math.Ceiling(widthD / 8.0), (int)Math.Ceiling(heightD / 8.0), (int)Math.Ceiling(depthD / 8.0)) ĬUDANoiseCube noiseCube = new CUDANoiseCube() ĬudaArra圓D noiseArray = noiseCube.GenerateUniformArray(16, 16, 16) ĬudaTextureArra圓D noiseTexture = new CudaTextureArra圓D(kernelPositionWeight, "noiseTexture", CUAddressMode.Wrap, CUFilterMode.Linear, CUTexRefSetFlags.NormalizedCoordinates, noiseArray) ĬudaDeviceVariable voxelsDev = new CudaDeviceVariable(count) Private void Generate(CudaKernel kernelPositionWeight, int width, int height, int depth) Allocate host memory for the convolution resultĬuFloatComplex h_convolved_signal_ref = new cuFloatComplex īool bTestResult = sdkCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 1e-5f) Plan.Exec(d_signal.DevicePointer, TransformDirection.Inverse) ĬuFloatComplex h_convolved_signal = d_signal Multiply the coefficients together and normalize the resultĬonsole.WriteLine("Launching ComplexPointwiseMulAndScale>") ĬomplexPointwiseMulAndScale.BlockDimensions = 256 ĬomplexPointwiseMulAndScale.GridDimensions = 32 ĬomplexPointwiseMulAndScale.Run(d_signal.DevicePointer, d_filter_kernel.DevicePointer, new_size, 1.0f / new_size) Ĭonsole.WriteLine("Transforming signal back cufftExecC2C") Plan.Exec(d_filter_kernel.DevicePointer, TransformDirection.Forward) Plan.Exec(d_signal.DevicePointer, TransformDirection.Forward) Allocate device memory for filter kernelĬudaDeviceVariable d_filter_kernel = new CudaDeviceVariable(new_size) ĭ_filter_kernel.CopyToDevice(h_padded_filter_kernel) ĬudaFFTPlan1D plan = new CudaFFTPlan1D(new_size, cufftType.C2C, 1) Ĭonsole.WriteLine("Transforming signal cufftExecC2C") Using (Stream stream = assembly.GetManifestResourceStream(resourceName))ĬomplexPointwiseMulAndScale = ctx.LoadKernelPTX(stream, "ComplexPointwiseMulAndScale") ĬuFloatComplex h_signal = new cuFloatComplex //we use cuFloatComplex for complex multiplaction in reference host code.įor (int i = 0 i d_signal = new CudaDeviceVariable(new_size) String liste = assembly.GetManifestResourceNames() Var assembly = Assembly.GetExecutingAssembly() NPPException.CheckNppStatus(status, this) Ĭonsole.WriteLine(" is starting.") Status = _8u_C1R_Advanced(_devPtrRoi, _sizeOriginal, _pitch, roiIn, dst.DevicePointerRoi, dst.Pitch, roiOut, nXFactor, nYFactor, buffer.DevicePointer, eInterpolationMode) ĭebug.WriteLine(String.Format("", DateTime.Now, "nppiHistogramRange_8u_AC4R", status)) NppiRect roiOut = new NppiRect(dst._pointRoi, dst._sizeRoi) NppiRect roiIn = new NppiRect(_pointRoi, _sizeRoi) Int bufferSize = ResizeAdvancedGetBufferHostSize(dst.SizeRoi, eInterpolationMode) ĬudaDeviceVariable buffer = new CudaDeviceVariable(bufferSize) Public void ResizeSqrPixelAdvanced(NPPImage_8uC1 dst, double nXFactor, double nYFactor, InterpolationMode eInterpolationMode) Currently only supports NPPI_INTER_LANCZOS3_Advanced. / The type of eInterpolation to perform resampling. / Factor by which y dimension is changed. / Factor by which x dimension is changed. This primitive matches the behavior of GraphicsMagick++. / 1 channel 8-bit unsigned image resize.
0 Comments
Leave a Reply. |