5 # ./runjava.sh <num_machine>
11 RECOVERYDIR='recovery'
15 LOGDIR=~/research/Robust/src/Benchmarks/Recovery/runlog
16 DSTMDIR=${HOME}/research/Robust/src/Benchmarks/Prefetch/config
17 MACHINELIST='dc-1.calit2.uci.edu dc-2.calit2.uci.edu dc-3.calit2.uci.edu dc-4.calit2.uci.edu dc-5.calit2.uci.edu dc-6.calit2.uci.edu dc-7.calit2.uci.edu dc-8.calit2.uci.edu'
21 # 1~8 machine id to be killed
23 ## Sequential Machine failure order ######
31 #ORDER=( 0 7 5 6 3 8 2 );
34 # killClients <fileName> <# of machines>
35 function killclients {
38 while [ $k -le $2 ]; do
39 echo "killing dc-$k ${fileName}"
40 ssh dc-${k} pkill -u ${USER} -f ${fileName}
45 # killClientsWith USR1 signal <fileName> <# of machines>
46 function killclientswithSignal {
49 while [ $k -le $2 ]; do
50 echo "killing dc-$k ${fileName}"
51 ssh dc-${k} killall -USR1 ${fileName}
56 # killonemachine <Benchmark file name> <machine_num>
57 function killonemachine {
60 echo "killing dc-$machine ${fileName}";
61 #ssh dc-${machine} pkill -u ${USER} -f ${fileName}
62 ssh dc-${machine} killall -USR1 ${fileName}
65 # runmachines <log filename>
66 function runMachines {
67 echo "Running on ${NUM_MACHINE} machines ... "
70 echo "Running machines"
76 DIR=`echo ${BASEDIR}\/${BM_DIR}\/${RECOVERYDIR}`;
82 while [ $k -gt 1 ]; do
83 echo "SSH into dc-${k}"
84 ssh dc-${k} 'cd '$DIR'; ./'$BM_NAME'.bin '>> log'-'$k 2>&1 &
89 echo "Running master machine ... "
90 echo "ssh dc-1 cd $DIR'; ./$BM_NAME.bin master $NUM_MACHINE $BM_ARGS";
91 ssh dc-1 'cd '$DIR'; ./'$BM_NAME'.bin master '$NUM_MACHINE $BM_ARGS >> log'-1' 2>&1 &
94 ########### Normal execution
95 # runNormalTest $NUM_MACHINES 1
96 function runNormalTest {
100 fName="$BM_NAME.bin";
106 if [ $NUM_MACHINE == 16 ]; then
111 while [ $tt -le $NUM_MACHINE ]; do
112 echo "------------------------------- Normal Test $1 ----------------------------" >> log-$tt
120 killclientswithSignal $fName 8
121 #killclients $fName 8
126 ########### Sequential Failure case ##########
127 function runSequentialFailureTest {
131 fName="$BM_NAME.bin";
138 if [ $k -eq 0 ]; then # if k = 0, it is a new test
139 if [ $test_iter -ne 1 ]; then
140 sleep $WAITTIME # wait the end of execution
141 #killclients $fName 8 # kill alive machines
142 killclientswithSignal $fName 8 #kill machines when there is more than 1 order
144 for outputIter in 1 2 3 4 5 6 7 8
146 echo "----------------------------------------------------------------------------------" >> log-$outputIter
151 for outputIter in 1 2 3 4 5 6 7 8
153 echo "------------------------------- Sequential Failure Test $test_iter ----------------------------" >> log-$outputIter
155 echo "------------------------------- Sequential Failure Test $test_iter ----------------------------"
157 sleep 10 # wait until all machine run
158 test_iter=`expr $test_iter + 1`
159 else # if k != 0, time to kill machines!
160 echo "------------------------ dc-$k is killed ------------------------" >> log-$k
161 echo "------------------------ dc-$k is killed ------------------------"
162 killonemachine $fName $k
164 let "delay= $RANDOM % $KILLDELAY + 8"
169 sleep $WAITTIME # wait the end of execution
170 # killclients $fName 8 # kill alive machines
171 killclientswithSignal $fName 8 #kill machines when finished processing everything in ORDER{ }
176 ####### Single machine failure Case ############
177 function runSingleFailureTest {
179 fName="$BM_NAME.bin";
184 SINGLE_ORDER=( 1 8 4 6 3 2 7 5 );
188 for machinename in ${SINGLE_ORDER[@]}
191 for outputIter in 1 2 3 4 5 6 7 8
193 echo "------------------------------- Single Failure Test $test_iter ----------------------------" >> log-$outputIter
195 echo "------------------------------- Single Failure Test $test_iter ----------------------------"
197 sleep 10 # wait until all machine run
198 test_iter=`expr $test_iter + 1`
199 echo "------------------------ dc-$machinename is killed ------------------------" >> log-$k
200 echo "------------------------ dc-$machinename is killed ------------------------"
201 killonemachine $fName $machinename
202 sleep $WAITTIME # wait till the end of execution
203 killclientswithSignal $fName 8 #kill rest of the alive machines
204 # Insert Randowm delay
205 let "delay= $RANDOM % $KILLDELAY + 4"
212 ###runRecovery <num iterations> <num machines> <recovery file name>
214 function runRecovery {
216 DIR=`echo ${BASEDIR}\/${BM_NAME}\/${RECOVERYDIR}`;
218 fName="$BM_NAME.bin";
220 while [ $i -le $1 ]; do
222 while [ $tt -le $2 ]; do
223 echo "------------------------------- running Recovery on $2 machines for iter=$i ----------------------------" >> log-$tt
227 #select the correct dstm config file
229 if [ $2 -eq 2 ]; then
230 ln -s ${DSTMDIR}/dstm_2.conf dstm.conf
232 if [ $2 -eq 3 ]; then
233 ln -s ${DSTMDIR}/dstm_3.conf dstm.conf
235 if [ $2 -eq 4 ]; then
236 ln -s ${DSTMDIR}/dstm_4.conf dstm.conf
238 if [ $2 -eq 5 ]; then
239 ln -s ${DSTMDIR}/dstm_5.conf dstm.conf
241 if [ $2 -eq 6 ]; then
242 ln -s ${DSTMDIR}/dstm_6.conf dstm.conf
244 if [ $2 -eq 7 ]; then
245 ln -s ${DSTMDIR}/dstm_7.conf dstm.conf
247 if [ $2 -eq 8 ]; then
248 ln -s ${DSTMDIR}/dstm_8.conf dstm.conf
253 while [ $k -gt 1 ]; do
254 echo "SSH into dc-${k}"
255 ssh dc-${k} 'cd '$DIR'; ./'$BM_NAME'.bin '>> log'-'$k 2>&1 &
261 echo "Running master machine ..."
262 ssh dc-1 'cd '$DIR'; ./'$BM_NAME'.bin master '$2 $BM_ARGS >> log'-1' 2>&1 &
264 echo "Terminating ... "
265 killclientswithSignal $fName $2
273 ###runDSM <num iterations> <num machines> <dsm file name>
277 DIR=`echo ${BASEDIR}\/${BM_NAME}\/${RECOVERYDIR}`;
281 while [ $i -le $1 ]; do
283 while [ $tt -le $2 ]; do
284 echo "------------------------------- running DSM on $2 machines for iter=$i ----------------------------" >> log-$tt
288 #select the correct dstm config file
290 if [ $2 -eq 2 ]; then
291 ln -s ${DSTMDIR}/dstm_2.conf dstm.conf
293 if [ $2 -eq 3 ]; then
294 ln -s ${DSTMDIR}/dstm_3.conf dstm.conf
296 if [ $2 -eq 4 ]; then
297 ln -s ${DSTMDIR}/dstm_4.conf dstm.conf
299 if [ $2 -eq 5 ]; then
300 ln -s ${DSTMDIR}/dstm_5.conf dstm.conf
302 if [ $2 -eq 6 ]; then
303 ln -s ${DSTMDIR}/dstm_6.conf dstm.conf
305 if [ $2 -eq 7 ]; then
306 ln -s ${DSTMDIR}/dstm_7.conf dstm.conf
308 if [ $2 -eq 8 ]; then
309 ln -s ${DSTMDIR}/dstm_8.conf dstm.conf
314 while [ $k -gt 1 ]; do
315 echo "SSH into dc-${k}"
316 ssh dc-${k} 'cd '$DIR'; ./'$BM_DSM'.bin '>> log'-'$k 2>&1 &
322 echo "Running master machine ..."
323 ssh dc-1 'cd '$DIR'; ./'$BM_DSM'.bin master '$2 $BM_ARGS >> log'-1' 2>&1 &
325 echo "Terminating ... "
326 killclientswithSignal $fName $2
337 BM_NAME=`echo $line | cut -f1 -d":"`
338 BM_ARGS=`echo $line | cut -f2 -d":"`
341 # Setup for remote machine
342 echo "BM_NAME='$BM_NAME'"
343 echo "BM_ARGS='$BM_ARGS'"
346 fileName=${BM_NAME}.bin
348 # terminate if it doesn't have parameter
349 let "NUM_MACHINE= $nummachines + 0";
351 echo "====================================== Normal Test =============================="
352 runNormalTest $NUM_MACHINES 1
353 echo "================================================================================"
355 # echo "====================================== Single Failure Test ============================="
356 # runSingleFailureTest $NUM_MACHINES
357 # echo "================================================================================="
359 # echo "====================================== Sequential Failure Test ============================="
360 # runSequentialFailureTest $NUM_MACHINES
361 # echo "================================================================================="
364 # echo "=============== Running javasingle for ${BM_NAME} on 1 machines ================="
365 # javasingle 1 ${BM_NAME}
367 # echo "================================================================================="
369 # echo "=============== Running recoverysingle for ${BM_NAME} on 1 machines ================="
370 # recoverysingle 1 ${BM_NAME}
372 # echo "================================================================================="
374 # echo "=============== Running dsmsingle for ${BM_NAME} on 1 machines ================="
375 # dsmsingle 1 ${BM_DSM}
377 # echo "================================================================================="
379 # echo "====================================== Recovery Execution Time ============================="
382 # echo "------- Running $count threads $BM_NAME recovery on $count machines -----"
383 # runRecovery 1 $count ${BM_NAME}
385 # echo "================================================================================="
387 # echo "====================================== DSM Execution Time ============================="
390 # echo "------- Running $count threads $BM_NAME dsm on $count machines -----"
391 # runDSM 1 $count $BM_DSM
393 # echo "================================================================================="
397 function javasingle {
399 DIR=`echo ${BASEDIR}\/${2}\/${JAVASINGLEDIR}`;
403 echo "ssh dc-1 cd $DIR'; ./${2}.bin 1 ${BM_ARGS}";
404 while [ $i -lt $1 ]; do
405 /usr/bin/time -f "%e" ./${2}.bin 1 ${BM_ARGS} 2> ${DIR}/tmp
406 cat ${DIR}/tmp >> ${LOGDIR}/${2}_javasingle.txt
412 function recoverysingle {
413 DIR=`echo ${BASEDIR}\/${2}\/${RECOVERYDIR}`;
417 ln -s ${DSTMDIR}/dstm_1.conf dstm.conf
421 while [ $i -lt $1 ]; do
422 echo "Running master machine ... "
423 echo "ssh dc-1 cd $DIR'; ./${2}.bin master 1 ${BM_ARGS}";
424 ssh dc-1 'cd '$DIR'; ./'${2}'.bin master '1 ${BM_ARGS} >> ${LOGDIR}/${2}_recoverysingle.txt 2>&1 &
427 echo "killing dc-1 ${fName}"
428 pkill -u ${USER} -f ${fName}
434 DIR=`echo ${BASEDIR}\/${BM_NAME}\/${RECOVERYDIR}`;
438 ln -s ${DSTMDIR}/dstm_1.conf dstm.conf
442 while [ $i -lt $1 ]; do
443 echo "Running master machine ... "
444 echo "ssh dc-1 cd $DIR'; ./${2}.bin master 1 ${BM_ARGS}";
445 ssh dc-1 'cd '$DIR'; ./'${2}'.bin master '1 ${BM_ARGS} >> ${LOGDIR}/${BM_NAME}_dsmsingle.txt 2>&1 &
448 echo "killing dc-1 ${fName}"
449 pkill -u ${USER} -f ${fName}
454 echo "---------- Starting Benchmarks ----------"
457 source bm_args_16threads.txt
458 echo "----------- done ------------"