1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
| void doInference(IExecutionContext& context, float* input, float* output, int batchSize) { const ICudaEngine& engine = context.getEngine(); assert(engine.getNbBindings() == 2); void* buffers[2];
int inputIndex, outputIndex;
printf("Bindings after deserializing:\n"); for (int bi = 0; bi < engine.getNbBindings(); bi++) { if (engine.bindingIsInput(bi) == true) { inputIndex = bi; printf("Binding %d (%s): Input.\n", bi, engine.getBindingName(bi)); } else { outputIndex = bi; printf("Binding %d (%s): Output.\n", bi, engine.getBindingName(bi)); } }
std::cout<<"inputIndex = "<< inputIndex << std::endl; std::cout<<"outputIndex = "<< outputIndex << std::endl;
CHECK(cudaMalloc(&buffers[inputIndex], batchSize * INPUT_H * INPUT_W * sizeof(float))); CHECK(cudaMalloc(&buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float)));
cudaStream_t stream; CHECK(cudaStreamCreate(&stream));
CHECK(cudaMemcpyAsync(buffers[inputIndex], input, batchSize * INPUT_H * INPUT_W * sizeof(float), cudaMemcpyHostToDevice, stream)); context.enqueue(batchSize, buffers, stream, nullptr); CHECK(cudaMemcpyAsync(output, buffers[outputIndex], batchSize * OUTPUT_SIZE * sizeof(float), cudaMemcpyDeviceToHost, stream)); cudaStreamSynchronize(stream);
cudaStreamDestroy(stream); CHECK(cudaFree(buffers[inputIndex])); CHECK(cudaFree(buffers[outputIndex])); }
|