Skip to content

Commit

Permalink
[bug] partially fix cuda saveseed, #98
Browse files Browse the repository at this point in the history
  • Loading branch information
fangq committed Nov 4, 2024
1 parent 6baa839 commit a6c2984
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 33 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build_all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
name: Build All
strategy:
matrix:
os: [ubuntu-20.04, ubuntu-22.04, macos-12, macos-14, windows-2019]
os: [ubuntu-20.04, ubuntu-22.04, macos-13, macos-14, windows-2019]
runs-on: ${{ matrix.os }}
defaults:
run:
Expand Down Expand Up @@ -164,7 +164,7 @@ jobs:
zip -FSr --symlink packages/mmclab-${{ env.RELEASE_TAG }}.zip mmclab
fi
- name: Upload mmclab package
if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-12' || matrix.os == 'macos-14' || matrix.os == 'windows-2019' }}
if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v3
with:
name: all-mmc-packages
Expand All @@ -184,7 +184,7 @@ jobs:
zip -FSr --symlink mmc/packages/mmc-${{ env.RELEASE_TAG }}.zip mmc -x 'mmc/packages*'
fi
- name: Upload mmc package
if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-12' || matrix.os == 'macos-14' || matrix.os == 'windows-2019' }}
if: ${{ matrix.os == 'ubuntu-20.04' || matrix.os == 'macos-13' || matrix.os == 'macos-14' || matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v3
with:
name: all-mmc-packages
Expand Down
3 changes: 1 addition & 2 deletions mmclab/example/demo_example_replay.m
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
cfg.issaveexit = 1; % save detected photon exit position and angles
cfg.issaveseed = 1; % save detected photon seeds to replay later
% cfg.method=0;
cfg.compute='cuda';

newcfg = mmclab(cfg, 'prep'); % preprocessing of the mesh to get the missing fields

Expand Down Expand Up @@ -82,7 +81,7 @@
% set up for wp replay

newcfg.outputtype = 'wp'; % replay and get wp
[cube3, detp3, ~, ~] = mmclab(newcfg);
[cube3, detp3] = mmclab(newcfg);

% the two detected photon arrays should be the same. however, because
% the program uses multi-threading, the orders may be different
Expand Down
8 changes: 4 additions & 4 deletions mmclab/example/demo_head_atlas.m
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
% for Fig.9(a) in TranYan2019(submitted). For simplicity, a coarse mesh (lower
% sampling rate) that represents part of the head(z>155) is simulated.
%
% [Sanchez2012] C.E.Sanchez J.E.Richards and C.R.Almli, Age-Specific MRI Templates
% for Pediatric Neuroimaging, Developmental Neuropsychology 37, 379–399 (2012).
% [Sanchez2012] C.E.Sanchez J.E.Richards and C.R.Almli, "Age-Specific MRI Templates
% for Pediatric Neuroimaging," Developmental Neuropsychology 37, 379–399 (2012).
%
% [Yan2019] S.Yan, A.P.Tran, and Q.Fang, Dual-grid mesh-based monte carlo algorithm
% for efficient photon transport simulations in complex three-dimensional media,
% [Yan2019] S.Yan, A.P.Tran, and Q.Fang, "Dual-grid mesh-based monte carlo algorithm
% for efficient photon transport simulations in complex three-dimensional media,"
% Journal of Biomedical Optics 24(2), 020503 (2019).
%
% [TranYan2019] A.P.Tran, S.Yan and Q.Fang, "Improving model-based fNIRS
Expand Down
4 changes: 2 additions & 2 deletions mmclab/example/demo_photon_sharing.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
%% prepare simulation input

cfg.nphoton = 3e6;
[cfg.node face cfg.elem] = meshabox([0 0 0], [60 60 20], 2, 2);
[cfg.node, face, cfg.elem] = meshabox([0 0 0], [60 60 20], 2, 2);
cfg.elemprop = ones(size(cfg.elem, 1), 1);
cfg.srcpos = [10 10 -2];
cfg.srcdir = [0 0 1];
Expand Down Expand Up @@ -38,7 +38,7 @@

%% plot results (same as in the mmc example)

[node, ~, elem] = meshabox([0 0 0], [60 60 20], 2, 2);
[node, face, elem] = meshabox([0 0 0], [60 60 20], 2, 2);
data = flux.data(:, 1:length(node), :);
cwdata = squeeze(sum(data, 3));

Expand Down
79 changes: 57 additions & 22 deletions src/mmc_core.cl
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ __device__ static float xorshift128p_nextf (__private RandType t[RAND_BUF_LEN])

return s1.f[0] - 1.0f;
}
#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)
__device__ static void copystate(__local float* v1, __private float* v2, int len) {
for (int i = 0; i < len; i++) {
v1[i] = v2[i];
Expand Down Expand Up @@ -491,7 +491,7 @@ __device__ void clearpath(__local float* p, int len) {
}
}

#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)
__device__ uint finddetector(float3* p0, __constant float4* gmed, __constant MCXParam* gcfg) {
uint i;

Expand All @@ -516,12 +516,21 @@ __device__ void savedetphoton(__global float* n_det, __global uint* detectedphot

if (baseaddr < GPU_PARAM(gcfg, maxdetphoton)) {
uint i;
#ifdef MCX_SAVE_SEED
#if defined(MCX_SAVE_SEED) || defined(__NVCC__)
#ifdef __NVCC__

for (i = 0; i < RAND_BUF_LEN; i++) {
photonseed[baseaddr * RAND_BUF_LEN + i] = initseed[i];
if (GPU_PARAM(gcfg, issaveseed)) {
#endif

for (i = 0; i < RAND_BUF_LEN; i++) {
photonseed[baseaddr * RAND_BUF_LEN + i] = initseed[i];
}

#ifdef __NVCC__
}

#endif

#endif
baseaddr *= (GPU_PARAM(gcfg, reclen) + 1);
n_det[baseaddr++] = detid;
Expand Down Expand Up @@ -1451,8 +1460,8 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP

int oldeid, fixcount = 0;
ray r = {gcfg->srcpos, gcfg->srcdir, {MMC_UNDEFINED, 0.f, 0.f}, GPU_PARAM(gcfg, e0), 0, 0, 1.f, 0.f, 0.f, 0.f, ID_UNDEFINED, 0.f};
#ifdef MCX_SAVE_SEED
RandType initseed[RAND_BUF_LEN];
#if defined(MCX_SAVE_SEED) || defined(__NVCC__)
RandType initseed[RAND_BUF_LEN] = {NULL};
#endif

clearpath(ppath, (GPU_PARAM(gcfg, reclen) + (GPU_PARAM(gcfg, srcnum) > 1) * GPU_PARAM(gcfg, srcnum)));
Expand All @@ -1461,27 +1470,44 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP

r.photonid = id;

#ifdef MCX_SAVE_SEED
#if defined(MCX_SAVE_SEED) || defined(__NVCC__)

#ifdef __NVCC__

if (GPU_PARAM(gcfg, issaveseed)) {
#endif

for (oldeid = 0; oldeid < RAND_BUF_LEN; oldeid++) {
initseed[oldeid] = ran[oldeid];
for (oldeid = 0; oldeid < RAND_BUF_LEN; oldeid++) {
initseed[oldeid] = ran[oldeid];
}

#ifdef __NVCC__
}

#endif

#endif

/*initialize the photon parameters*/
launchnewphoton(gcfg, &r, node, elem, srcelem, ran, srcpattern);

#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)
#ifdef __NVCC__

if (GPU_PARAM(gcfg, issavedet)) {
#endif

if (GPU_PARAM(gcfg, srctype) != stPattern || GPU_PARAM(gcfg, srcnum) == 1) {
ppath[GPU_PARAM(gcfg, reclen) - 1] = r.weight; /*last record in partialpath is the initial photon weight*/
} else if (GPU_PARAM(gcfg, srctype) == stPattern) {
*((__local uint*)(ppath + GPU_PARAM(gcfg, reclen) - 1)) = r.posidx;
}

#ifdef __NVCC__
}

#endif

#endif

if (GPU_PARAM(gcfg, srcnum) == 1) {
Expand Down Expand Up @@ -1518,7 +1544,7 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
r.faceid = -1;
}

#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)

if (GPU_PARAM(gcfg, issavedet) && r.Lmove > 0.f && type[r.eid - 1] > 0) {
ppath[GPU_PARAM(gcfg, maxmedia) + type[r.eid - 1] - 1] += r.Lmove; /*second medianum block is the partial path*/
Expand Down Expand Up @@ -1576,7 +1602,7 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP

r.slen = branchless_badouel_raytet(&r, gcfg, ppath, elem, weight, type[r.eid - 1], facenb, normal, gmed, replayweight, replaytime);
(*raytet)++;
#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)

if (GPU_PARAM(gcfg, issavedet) && r.Lmove > 0.f && type[r.eid - 1] > 0) {
ppath[GPU_PARAM(gcfg, maxmedia) + type[r.eid - 1] - 1] += r.Lmove;
Expand All @@ -1594,7 +1620,7 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
fixphoton(&r.p0, node, (__global int*)(elem + (r.eid - 1)*GPU_PARAM(gcfg, elemlen)));
r.slen = branchless_badouel_raytet(&r, gcfg, ppath, elem, weight, type[r.eid - 1], facenb, normal, gmed, replayweight, replaytime);
(*raytet)++;
#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)

if (GPU_PARAM(gcfg, issavedet) && r.Lmove > 0.f && type[r.eid - 1] > 0) {
ppath[GPU_PARAM(gcfg, maxmedia) + type[r.eid - 1] - 1] += r.Lmove;
Expand All @@ -1618,7 +1644,7 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
//if(GPU_PARAM(gcfg,debuglevel)&dlExit)
GPUDEBUG(("E %f %f %f %f %f %f %f %d\n", r.p0.x, r.p0.y, r.p0.z,
r.vec.x, r.vec.y, r.vec.z, r.weight, r.eid));
#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)

if (GPU_PARAM(gcfg, issavedet) && GPU_PARAM(gcfg, issaveexit)) { /*when issaveexit is set to 1*/
copystate(ppath + (GPU_PARAM(gcfg, reclen) - 7), (__private float*) & (r.p0), 3); /*columns 7-5 from the right store the exit positions*/
Expand All @@ -1640,16 +1666,21 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
GPUDEBUG(("X %f %f %f %d %u %e\n", r.p0.x, r.p0.y, r.p0.z, r.eid, id, r.slen));
}

#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)
#ifdef __NVCC__

if (GPU_PARAM(gcfg, issavedet)) {
#endif

if (r.eid <= 0) {
#ifdef MCX_SAVE_SEED
#if defined(MCX_SAVE_SEED) || defined(__NVCC__)

if (GPU_PARAM(gcfg, isextdet) && type[oldeid - 1] == GPU_PARAM(gcfg, maxmedia) + 1) {
savedetphoton(n_det, detectedphoton, ppath, &r, gmed, oldeid, gcfg, photonseed, initseed);
} else {
savedetphoton(n_det, detectedphoton, ppath, &r, gmed, -1, gcfg, photonseed, initseed);
if (GPU_PARAM(gcfg, issaveseed)) {
if (GPU_PARAM(gcfg, isextdet) && type[oldeid - 1] == GPU_PARAM(gcfg, maxmedia) + 1) {
savedetphoton(n_det, detectedphoton, ppath, &r, gmed, oldeid, gcfg, photonseed, initseed);
} else {
savedetphoton(n_det, detectedphoton, ppath, &r, gmed, -1, gcfg, photonseed, initseed);
}
}

#else
Expand All @@ -1662,8 +1693,12 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP

#endif
}

#ifdef __NVCC__
}

#endif

#endif
break; /*photon exits boundary*/
}
Expand All @@ -1689,7 +1724,7 @@ __device__ void onephoton(unsigned int id, __local float* ppath, __constant MCXP
savedebugdata(&r, id, reporter, gdebugdata, gcfg);
}

#ifdef MCX_SAVE_DETECTORS
#if defined(MCX_SAVE_DETECTORS) || defined(__NVCC__)

if (GPU_PARAM(gcfg, issavedet)) {
if (GPU_PARAM(gcfg, ismomentum) && type[r.eid - 1] > 0) { /*when ismomentum is set to 1*/
Expand Down

0 comments on commit a6c2984

Please sign in to comment.