3#include <memcore/stringlist.hpp>
4#include <memcore/RefPtr.hpp>
6#include <ocean/plankton/VObjectState.hpp>
7#include <eagle/FixedArray.hpp>
8#include <bone/FishField.hpp>
12#define LOCAL_SIZE_LIMIT 512U
15size_t tensorlocalWorkSize = 128;
41const int DebugNum = 128;
49static unsigned int closestPow2_less(
unsigned int x){
51 int y =
static_cast<int>(x);
52 while (x >>= 1) ++
log;
58unsigned int factorRadix2(
unsigned int L){
79 bool kernelAlloc, bufferAlloc;
82 uint allocatedBufferSize;
85 uint particleNum, mulParticleNum, pow2ParticleNum;
88 uint cellNum, mulCellNum;
124 void setupDeviceContext();
132 void writeFragment(
float *);
134 void readFragment(
float *);
136 void displayFragment(){}
145 kernelAlloc(
false), bufferAlloc(
false),
147 kernelTime(0.0), overallTime(0.0)
148 { setupDeviceContext(); }
152 cout <<
"*** ~TensorDeviceState ***" <<
endl;
154 if(bufferAlloc) finalizeDeviceBuffers();
155 if(kernelAlloc) finalizeDeviceKernels();
162 void initDeviceKernels() {
164 cout <<
"\n---------------------------------------------------------------------" <<
endl;
165 cout <<
"CL create kernels for device " << device->name <<
endl;
195 cout <<
"CL: kernels initialization done" <<
endl;
196 cout <<
"---------------------------------------------------------------------\n" <<
endl;
199 void finalizeDeviceKernels() {
200 cout <<
"\n---------------------------------------------------------------------" <<
endl;
201 cout <<
"CL: releasing kernels for device " << device->name <<
endl;
223 printf(
"CL: kernels released\n");
225 cout <<
"---------------------------------------------------------------------\n" <<
endl;
228 void initDeviceBuffers(
size_t _size,
size_t cellNum) {
230 cout <<
"\n---------------------------------------------------------------------" <<
endl;
231 cout <<
"CL create buffers for device " << device->name <<
endl;
234 cout <<
"particleNum "<< particleNum <<
", pow2ParticleNum " << pow2ParticleNum <<
", cellNum " << cellNum <<
endl;
246 allocatedBufferSize = particleNum;
248 cout <<
"---------------------------------------------------------------------\n" <<
endl;
251 void finalizeDeviceBuffers()
253 cout <<
"\n---------------------------------------------------------------------" <<
endl;
254 cout <<
endl <<
"CL: releasing buffers for device " << posBuffer->dev->name <<
endl;
266 allocatedBufferSize = -1;
268 cout <<
"---------------------------------------------------------------------\n" <<
endl;
276 const float epsilon = 0.0005;
278 param.min[0] = bb->mincoord()[0] - epsilon;
279 param.min[1] = bb->mincoord()[1] - epsilon;
280 param.min[2] = bb->mincoord()[2] - epsilon;
281 Eagle::PhysicalSpace::tvector
diag = bb->diagonal();
282 cout <<
"BoundingBox (";
283 cout <<
"min: " << param.min[0] <<
"," << param.min[1] <<
"," << param.min[2] ;
284 cout <<
", max: " << bb->maxcoord()[0] <<
"," << bb->maxcoord()[1] <<
"," << bb->maxcoord()[2] <<
")" <<
endl;
291 for(
int i=0; i<3; i++) {
300 param.cellSize[i] = 2.f *
_radius;
301 cout <<
"gridSize*: " << param.gridSize[i] <<
" " <<
endl;
303 newMax[i] = param.min[i] + param.cellSize[i] * param.gridSize[i];
309 <<
"(" << param.gridSize[0] <<
"," << param.gridSize[1] <<
"," << param.gridSize[2] <<
") " <<
endl;
313 cellNum = param.gridSize[0] * param.gridSize[1] * param.gridSize[2];
316 if(cellNum % tensorlocalWorkSize) mulCellNum = (cellNum / tensorlocalWorkSize + 1) * (tensorlocalWorkSize);
317 else mulCellNum = cellNum;
318 cout <<
"Extended BoundingBox (min: " << param.min[0] <<
"," << param.min[1] <<
"," << param.min[2] <<
", max: " <<
newMax[0] <<
"," <<
newMax[1] <<
"," <<
newMax[2] <<
")" <<
endl;
324 out <<
"CL kernel & buffers status:" <<
endl;
325 out <<
"relaxed math: " << (t.relaxedMath?
"on":
"off");
326 out <<
", fp type: " << (t.fpType?
"float":
"double");
327 out <<
", device id: " << t.deviceId;
328 out <<
", buffer size: " << t.allocatedBufferSize <<
endl;
335void TensorDeviceState::setupDeviceContext(){
337 if(device ==
NULL)
cout <<
"First assignment device to the deviceState" <<
endl;
340 if(bufferAlloc) finalizeDeviceBuffers();
343 if(kernelAlloc) finalizeDeviceKernels();
354 cout <<
"---------------------------------------------------------------------" <<
endl;
355 cout <<
"Starting setup of kernels for device " << device->name <<
endl;
359 finalizeDeviceKernels();
370 cout <<
"---------------------------------------------------------------------" <<
endl;
371 cout <<
"setup buffer for device " << device->name <<
endl;
377 cout <<
"radius "<<
_radius<<
", size "<< particleNum <<
", cell num"<< cellNum <<
endl;
380 finalizeDeviceBuffers();
382 initDeviceBuffers(particleNum, cellNum);
393 if(
_particleNum % tensorlocalWorkSize) mulParticleNum = (
_particleNum / tensorlocalWorkSize + 1) * (tensorlocalWorkSize);
397 pow2ParticleNum = closestPow2_less(mulParticleNum);
406 param.gridSize[3] = cellNum;
408 param.cellSize[3] = 1;
411 cout <<
" particleNum " << particleNum <<
", multiple " << mulParticleNum <<
", pow2 " << pow2ParticleNum <<
endl;
413 cout <<
" cellNum " << cellNum <<
"(mul "<< mulCellNum <<
")"<<
endl;
414 cout <<
" CL threads: global " <<
globalWorkSize <<
", tensor local " << tensorlocalWorkSize <<
", sorting local " << LOCAL_SIZE_LIMIT <<
endl;
416 cout <<
" * gridSize" << param.gridSize <<
endl;
417 cout <<
" * min " << param.min <<
endl;
418 cout <<
" * cellSize " << param.cellSize <<
endl;
451 (
size_t)0,
static_cast<void*
>(
d_DstKey),
452 (
size_t)0,
static_cast<void*
>(
d_DstVal),
453 (
size_t)0,
static_cast<void*
>(
d_SrcKey),
454 (
size_t)0,
static_cast<void*
>(
d_SrcVal),
456 sizeof(
uint),
static_cast<void*
>(&
dir)
466 (
size_t)0,
static_cast<void*
>(
d_DstKey),
467 (
size_t)0,
static_cast<void*
>(
d_DstVal),
468 (
size_t)0,
static_cast<void*
>(
d_SrcKey),
469 (
size_t)0,
static_cast<void*
>(
d_SrcVal)
476 if(
stride >= LOCAL_SIZE_LIMIT)
483 (
size_t)0,
static_cast<void*
>(
d_DstKey),
484 (
size_t)0,
static_cast<void*
>(
d_DstVal),
485 (
size_t)0,
static_cast<void*
>(
d_DstKey),
486 (
size_t)0,
static_cast<void*
>(
d_DstVal),
488 sizeof(
uint),
static_cast<void*
>(&
size),
490 sizeof(
uint),
static_cast<void*
>(&
dir)
500 (
size_t)0,
static_cast<void*
>(
d_DstKey),
501 (
size_t)0,
static_cast<void*
>(
d_DstVal),
502 (
size_t)0,
static_cast<void*
>(
d_DstKey),
503 (
size_t)0,
static_cast<void*
>(
d_DstVal),
506 sizeof(
uint),
static_cast<void*
>(&
size),
507 sizeof(
uint),
static_cast<void*
>(&
dir)
535 const size_t pow2part = pow2ParticleNum;
560 (
size_t)0,
static_cast<void*
>(hashBuffer),
561 sizeof(
uint),
static_cast<void*
>(&mulCellNum),
562 sizeof(
uint),
static_cast<void*
>(&pow2ParticleNum)
567 (
size_t)0,
static_cast<void*
>(posBuffer),
568 (
size_t)0,
static_cast<void*
>(hashBuffer),
569 (
size_t)0,
static_cast<void*
>(indexBuffer),
570 (
size_t)0,
static_cast<void*
>(paramBuffer),
571 sizeof(
uint),
static_cast<void*
>(&particleNum)
581 sorting(hashBuffer, indexBuffer, hashBuffer, indexBuffer,
599 const size_t mcn = mulCellNum;
601 (
size_t)0,
static_cast<void*
>(startBuffer),
602 sizeof(
uint),
static_cast<void*
>(&cellNum),
610 (
size_t)0,
static_cast<void*
>(hashBuffer),
611 (
size_t)0,
static_cast<void*
>(indexBuffer),
612 (
size_t)0,
static_cast<void*
>(posBuffer),
614 (
size_t)0,
static_cast<void*
>(startBuffer),
615 (
size_t)0,
static_cast<void*
>(endBuffer),
616 (
size_t)0,
static_cast<void*
>(pSortedBuffer),
619 sizeof(
uint),
static_cast<void*
>(&particleNum)
641 (
size_t)0,
static_cast<void*
>(pSortedBuffer),
642 (
size_t)0,
static_cast<void*
>(startBuffer),
643 (
size_t)0,
static_cast<void*
>(endBuffer),
644 (
size_t)0,
static_cast<void*
>(indexBuffer),
645 (
size_t)0,
static_cast<void*
>(paramBuffer),
646 sizeof(
uint),
static_cast<void*
>(&particleNum),
647 (
size_t)0,
static_cast<void*
>(tensorBuffer)
651 cout <<
"YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY TEST" <<
endl;
656 cout <<
"XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXx CL: all kernels launched - kernel time: " << kernelTime <<
endl;
660void TensorDeviceState::writeFragment(
float *position)
662 overallTime = time.
secs();
696void TensorDeviceState::readFragment(
float *
tensor)
716 bool relaxedMath, useFloat;
726 : relaxedMath(
false), useFloat(
true)
728 cout <<
"*** TensorDevicePool ***" <<
endl;
733 cout <<
"*** ~TensorDevicePool ***" <<
endl;
738 void setupKernel(
bool m,
bool f){
740 if(relaxedMath != m || useFloat != f){
741 for(
int i=0; i<
size(); i++)
742 (*
this)[i]->setupKernel(m,f);
752 cout <<
"OpenCL device state pool initialization" <<
endl;
763 st <<
". " <<
dev->name;
764 st <<
"/" <<
dev->vendor;
765 devices.push_back(
st.str());
775 for(it = devices.begin(); it != devices.end(); it++) policies.push_back(*it);
776 policies.push_back(
"RoundRobin");
779 for(
int i=0; i<
size(); i++)
780 (*
this)[i]->setupKernel(relaxedMath, useFloat);
786 for(
int i=0; i<
size(); i++)
805 out <<
"TensorDevicePool (radius: " << t.radius <<
") devices:";
806 for(
unsigned i=0; i<t.
size(); i++)
833 return *(
pool[deviceId]);
844 deviceId = deviceId++ %
pool.size();
845 return *(
pool[deviceId]);
constexpr __enable_if_is_duration< _ToDur > ceil(const duration< _Rep, _Period > &__d)
complex< _Tp > log(const complex< _Tp > &)
basic_stringstream< char > stringstream
basic_ostream< char > ostream
valarray< size_t > stride() const
valarray< size_t > size() const
basic_ostream< _CharT, _Traits > & endl(basic_ostream< _CharT, _Traits > &__os)
constexpr void push_back(const value_type &__x)
constexpr void clear() noexcept
constexpr size_type size() const noexcept
An iterator with an optional DataCreator, which is just a class to intercept creation of data along a...
Definition CreativeIterator.hpp:34
double secs() const noexcept
Definition TensorFromPointCloud_CL_state.hpp:837
Scheduling policy interface.
Definition TensorFromPointCloud_CL_state.hpp:819
Definition TensorFromPointCloud_CL_state.hpp:825
Given a fragmented field of curvilinear coordinates, (3D array of coordinates), build a uniform Grid ...
Definition FAQ.dox:2
note: cannot derive from FloatingSkeletonRenderer as long as independent base class TriangleRenderer ...
Pool of current OpenCL devices.
Definition TensorFromPointCloud_CL_state.hpp:714
Definition TensorFromPointCloud_CL_state.hpp:74
void compute()
Definition TensorFromPointCloud_CL_state.hpp:526
void setSize(size_t _particleNum, RefPtr< BoundingBox > bb, float radius)
icl_kernel *hashKernelDouble; icl_kernel *startEndKernelDouble; icl_kernel *tensorKernelDouble;
Definition TensorFromPointCloud_CL_state.hpp:386
Definition TensorFromPointCloud_CL_state.hpp:33