#include < stdlib.h>
#include < unistd.h>
#include < string.h>
#include < stdio.h>
#include < fcntl.h>
#include < sys/stat.h>
#include < sys/types.h>
// Setting the checkpoint file from withing the process
// This can also be done via the -C argument to mosrun
int setCheckpointFile(char *file) {
int fd;
fd = open("/proc/self/checkpointfile", 1|O_CREAT, file);
if (fd == -1) {
return 0;
}
return 1;
}
// Triggering a checkpoint from within the process
int triggerCheckpoint() {
int fd;
fd = open("/proc/self/checkpoint", 1|O_CREAT, 1);
if(fd == -1) {
fprintf(stderr, "Error doing self checkpoint \n");
return 0;
}
printf("Checkpoint was done successfully\n");
return 1;
}
int main(int argc, char **argv) {
int j, unit, t;
char *checkpointFileName;
int checkpointUnit = 0;
if(argc < 3) {
fprintf(stderr, "Usage %s < checkpoint-file> < unit> \n", argv[0]);
exit(1);
}
checkpointFileName = strdup(argv[1]);
checkpointUnit = atoi(argv[2]);
if(checkpointUnit < 1 || checkpointUnit > 100) {
fprintf(stderr, "Checkpoint unit should be > 0 and < 100\n");
exit(1);
}
printf("Checkpoint file: %s\n", checkpointFileName);
printf("Checkpoint unit: %d\n", checkpointUnit);
// Setting the checkpoint file from within the process (can also be done using
// the -C argument of mosrun
if(!setCheckpointFile(checkpointFileName)) {
fprintf(stderr, "Error setting the checkpoint filename from within the process\n");
fprintf(stderr, "Make sure you are running this program via mosrun\n");
return 0;
}
// Main loop ... running for 100 units. change this loop if you wish
// the program to run do more loops
for( unit = 0; unit < 100 ; unit++ ) {
// Consuming some cpu time (simulating the run of the application)
// Change the number below to cause each loop to consume more (or) less time
for( t=0, j = 0; j < 1000000 * 500; j++ ) {
t = j+unit*2;
}
printf("Unit %d done\n", unit);
// Trigerring a checkpoint request from within the process
if(unit == checkpointUnit) {
if(!triggerCheckpoint())
return 0;
}
}
return 1;
}
To compile: gcc -o checkpoint_demo checkpoint_demo.c
To run: mosrun checkpoint_demo
A typical run:
> mosrun ./checkpoint_demo ccc 5
Checkpoint file: ccc
Checkpoint unit: 5
Unit 0 done
Unit 1 done
Unit 2 done
Unit 3 done
Unit 4 done
Unit 5 done
Checkpoint was done successfully
Unit 6 done
Unit 7 done
Unit 8 done
^C
The program triggered a checkpoint after unit 5.
The checkpointed file was saved in ccc.1.
After unit 8 the program was killed.
To restart:
> mosrun -R ccc.1
Checkpoint was done successfully
Unit 6 done
Unit 7 done
Unit 8 done
Unit 9 done
Unit 10 done
...
The program was restarted from the point right after it was checkpointed.