1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
| #include "cuda.h" #include "blas.h"
#include <assert.h> #include <stdlib.h> #include <time.h> #include <stdio.h>
void error(const char* s) { perror(s); assert(0); exit(-1); }
void check_error(cudaError_t status) { cudaError_t status2 = cudaGetLastError(); if (status != cudaSuccess) { const char *s = cudaGetErrorString(status); char buffer[256]; printf("CUDA Error: %s\n", s); assert(0); snprintf(buffer, 256, "CUDA Error: %s", s); error(buffer); } if (status2 != cudaSuccess) { const char *s = cudaGetErrorString(status); char buffer[256]; printf("CUDA Error Prev: %s\n", s); assert(0); snprintf(buffer, 256, "CUDA Error Prev: %s", s); error(buffer); } }
dim3 cuda_gridsize(size_t n){ size_t k = (n-1) / BLOCK + 1; size_t x = k; size_t y = 1; if(x > 65535){ x = ceil(sqrt(k)); y = (n-1)/(x*BLOCK) + 1; } dim3 d = {x, y, 1}; return d; }
float* cuda_make_array(float* x,size_t n) { float *x_gpu; size_t size = sizeof(float)*n; cudaError_t status = cudaMalloc((void **)&x_gpu, size); check_error(status); if(x){ status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice); check_error(status); } else { fill_gpu(n, 0, x_gpu, 1); } if(!x_gpu) error("Cuda malloc failed\n"); return x_gpu; }
void cuda_free(float* x_gpu) { cudaError_t status = cudaFree(x_gpu); check_error(status); }
void cuda_push_array(float *x_gpu,float* x,size_t n) { size_t size = sizeof(float)*n; cudaError_t status = cudaMemcpy(x_gpu,x,size,cudaMemcpyHostToDevice); check_error(status); }
void cuda_pull_array(float *x_gpu,float* x,size_t n) { size_t size = sizeof(float)*n; cudaError_t status = cudaMemcpy(x,x_gpu,size,cudaMemcpyDeviceToHost); check_error(status); }
|