I used a single Bede interactive node to run a parameter variation study of the CCZ4 example in the branch ccz4-cpp (7e02a6cd8b2c29f8990e7a6d588f73dc71d0d58a). The methodology is simple, vary parameters (recompile if the problem setup changes) and record the log file — I extract the time stepping time with a separate script that converts the logfiles to csv.
This is my bash script:
#!/bin/bash
module load cuda/10.1.243 llvm
source ~/miniconda3/bin/activate
export PYTHONPATH=$PWD/../../../python:$PYTHONPATH
declare -a THREADS=("19" "39")
declare -a FNUM=("4" "8" "16" "32" "64" "128" "256" "512" "1024")
declare -a FMAX=("0" "1" "2" "4" "16" "32" "64" "128" "256")
declare -a CS=("0.4" "0.2" "0.1")
declare -a PS=("6" "8" "10" "20")
for cs in "${CS[@]}"
do
for ps in "${PS[@]}"
do
python3 ccz4.py -impl fv-fixed-gpu -cs ${cs} -ps ${ps} -et 0.1 -plt 0
for fnum in "${FNUM[@]}"
do
for fmax in "${FMAX[@]}"
do
for threads in "${THREADS[@]}"
do
echo "$cs $ps $fnum $fmax $threads"
FUSEMAX=${fmax} FUSENUM=${fnum} taskset -c 0-${threads} ./peano4 2>&1 > test_CS_${cs}_PS_${ps}_FUSENUM_${fnum}_FUSEMAX_${fmax}_THREADS_${threads}.log
done
done
done
done
done
The problemsize requires the knowledge of the number of cells which I extract from the log files as such:
def getNCells(fname):
celltot = []
complete = []
with open(fname) as f:
for line in f:
l = line.strip()
if "total=" in l:
celltot.append(l)
else:
continue
ncells = []
for ct in reversed(celltot):
data = ct.split()
if data[2] not in complete:
ncells.append(data[-2].split("=")[-1])
complete.append(data[2])
nc = 0
for i in ncells:
l, r = i.split("/")
nc+=int(l)
# nc+=int(r)
return nc
\(S = p^2 * N_\text{cells}\) where p is the patch size, \(N_\text{cells}\) is the just extracted number of cells and \(S\) is the problem size.
library(tidyverse)
read_csv("data01.csv") %>% filter(time >0) %>% mutate(timeperdof=time/problemsize) ->df
df %>% filter(threads==19) ->df20
df %>% filter(threads==39) ->df40
Let’s look at some data
df %>% filter(fusemax==0) %>% group_by(ps,threads) %>% summarize(first=min(timeperdof), last=min(timeperdof))
## # A tibble: 4 x 4
## # Groups: ps [2]
## ps threads first last
## <dbl> <dbl> <dbl> <dbl>
## 1 6 19 0.00177 0.00177
## 2 6 39 0.00162 0.00162
## 3 8 19 0.00192 0.00192
## 4 8 39 0.00168 0.00168
df %>% filter(fusemax>0) %>% group_by(ps,threads) %>% summarize(first=min(timeperdof), last=min(timeperdof))
## # A tibble: 4 x 4
## # Groups: ps [2]
## ps threads first last
## <dbl> <dbl> <dbl> <dbl>
## 1 6 19 0.00144 0.00144
## 2 6 39 0.00109 0.00109
## 3 8 19 0.00125 0.00125
## 4 8 39 0.00112 0.00112
The data for 20 threads
df %>% filter(threads==19 & (fusemax ==0 | fusemax<500) ) %>% ggplot(aes(x=fusenum, y=timeperdof, color=factor(fusemax))) + geom_point(size=1) + facet_wrap(vars(ps))
Here’s a plot for 40 threads
df %>% filter(threads==39 & (fusemax ==0 | fusemax<500) ) %>% ggplot(aes(x=fusenum, y=timeperdof, color=factor(fusemax))) + geom_point(size=1) + facet_wrap(vars(ps))
We observe quite a number of runs exiting with a segfault. The plot shows for which parameter combinations this happens.
read_csv("data01.csv") ->dfall
dfall %>% ggplot(aes(x=fusenum, y=fusemax, color=factor(sign(time)))) + geom_point(size=1)+ facet_wrap(vars(threads))
library(tidyverse)
read_csv("data02.csv") %>% filter(time >0) %>% mutate(timeperdof=time/problemsize) ->df
df %>% filter(threads==19) ->df20
df %>% filter(threads==39) ->df40
Let’s look at some data
The best times without gpu are (20 and 40 threads)
df20 %>% filter(fusemax==0) %>% summarize(first=min(timeperdof), last=min(timeperdof))
## # A tibble: 1 x 2
## first last
## <dbl> <dbl>
## 1 0.00320 0.00320
df40 %>% filter(fusemax==0) %>% summarize(first=min(timeperdof), last=min(timeperdof))
## # A tibble: 1 x 2
## first last
## <dbl> <dbl>
## 1 0.00325 0.00325
The best times with gpu:
df20 %>% filter(fusemax>0) %>% summarize(first=min(timeperdof), last=min(timeperdof))
## # A tibble: 1 x 2
## first last
## <dbl> <dbl>
## 1 0.00307 0.00307
df40 %>% filter(fusemax>0) %>% summarize(first=min(timeperdof), last=min(timeperdof))
## # A tibble: 1 x 2
## first last
## <dbl> <dbl>
## 1 0.00292 0.00292
The data for 20 threads
df %>% filter(threads==19 & (fusemax ==0 | fusemax<500) ) %>% ggplot(aes(x=fusenum, y=timeperdof, color=factor(fusemax))) + geom_point(size=1)
Here’s a plot for 40 threads
df %>% filter(threads==39 & (fusemax ==0 | fusemax<500) ) %>% ggplot(aes(x=fusenum, y=timeperdof, color=factor(fusemax))) + geom_point(size=1)
We observe quite a number of runs exiting with a segfault. The plot shows for which parameter combinations this happens.
read_csv("data02.csv") ->dfall
dfall %>% ggplot(aes(x=fusenum, y=fusemax, color=factor(sign(time)))) + geom_point(size=1)+ facet_wrap(vars(threads))