-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathsimple.cpp
More file actions
119 lines (94 loc) · 3.71 KB
/
simple.cpp
File metadata and controls
119 lines (94 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: BSD-3-Clause
//
#include <OptiXToolkit/DemandLoading/DemandLoader.h>
#include <cuda_runtime.h>
#include <algorithm>
#include <atomic>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <exception>
#include <stdexcept>
#include "Simple.h"
using namespace demandLoading;
// Check status returned by a CUDA call.
inline void check( cudaError_t status )
{
if( status != cudaSuccess )
throw std::runtime_error( cudaGetErrorString( status ) );
}
// Global count of requests processed.
std::atomic<int> g_numRequestsProcessed( 0 );
void* toPageEntry( unsigned int value )
{
void* result{};
std::intptr_t source = value;
std::memcpy( &result, &source, sizeof( result ) );
return result;
}
// This callback is invoked by the demand loading library when a page request is processed.
bool loadResourceCallback( cudaStream_t /*stream*/, unsigned int pageId, void* /*context*/, void** pageTableEntry )
{
++g_numRequestsProcessed;
*pageTableEntry = toPageEntry( pageId );
return true;
}
void validatePageTableEntries( const std::vector<PageTableEntry>& pageTableEntries, unsigned int currentPage, unsigned int batchSize )
{
for( unsigned int i = 0; i < batchSize; ++i )
{
if( pageTableEntries[i] != currentPage )
printf( "Mismatch pageTable[%u]=%llu != %u\n", i, pageTableEntries[i], currentPage );
++currentPage;
}
}
int main()
{
// Initialize CUDA.
check( cudaFree( nullptr ) );
// Create a stream to be used for asynchronous operations
unsigned int deviceIndex = 0;
check( cudaSetDevice( deviceIndex ) );
cudaStream_t stream;
check( cudaStreamCreate( &stream ) );
// Create DemandLoader
DemandLoader* loader = createDemandLoader( Options() );
// Create a resource, using the given callback to handle page requests.
const unsigned int numPages = 128;
unsigned int startPage = loader->createResource( numPages, loadResourceCallback, nullptr );
// Process all the pages of the resource in batches.
const unsigned int batchSize = 32;
unsigned int numLaunches = 0;
void* devPageTableEntries{};
cudaMalloc( &devPageTableEntries, sizeof( PageTableEntry ) * numPages );
std::vector<PageTableEntry> pageTableEntries;
for( unsigned int currentPage = startPage; currentPage < startPage + numPages; )
{
// Prepare for launch, obtaining DeviceContext.
DeviceContext context;
loader->launchPrepare( stream, context );
// Launch the kernel.
launchPageRequester( stream, context, currentPage, currentPage + batchSize,
static_cast<PageTableEntry*>( devPageTableEntries ) );
++numLaunches;
// Initiate request processing, which returns a Ticket.
Ticket ticket = loader->processRequests( stream, context );
// Wait for any page requests to be processed.
ticket.wait();
// Advance the loop counter only when there were no page requests.
if( ticket.numTasksTotal() == 0 )
{
pageTableEntries.resize( batchSize );
cudaMemcpy( pageTableEntries.data(), devPageTableEntries, batchSize * sizeof( PageTableEntry ), cudaMemcpyDeviceToHost );
validatePageTableEntries( pageTableEntries, currentPage, batchSize );
currentPage += batchSize;
}
}
printf( "Processed %i requests in %i launches.\n", g_numRequestsProcessed.load(), numLaunches );
// Clean up
cudaFree( devPageTableEntries );
check( cudaStreamDestroy( stream ) );
destroyDemandLoader( loader );
return 0;
}