1 /*****************************************************************************
2 * Copyright (C) 2006-2008 by Michael Rieker, Jason Ansel, Kapil Arya, and *
3 * Gene Cooperman *
4 * mrieker@nii.net, jansel@csail.mit.edu, kapil@ccs.neu.edu, and *
5 * gene@ccs.neu.edu *
6 * *
7 * This file is part of the MTCP module of DMTCP (DMTCP:mtcp). *
8 * *
9 * DMTCP:mtcp is free software: you can redistribute it and/or *
10 * modify it under the terms of the GNU Lesser General Public License as *
11 * published by the Free Software Foundation, either version 3 of the *
12 * License, or (at your option) any later version. *
13 * *
14 * DMTCP:dmtcp/src is distributed in the hope that it will be useful, *
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
17 * GNU Lesser General Public License for more details. *
18 * *
19 * You should have received a copy of the GNU Lesser General Public *
20 * License along with DMTCP:dmtcp/src. If not, see *
21 * <http://www.gnu.org/licenses/>. *
22 *****************************************************************************/
23
24 /********************************************************************************************************************************/
25 /* */
26 /* This command-line utility is what a user uses to perform a restore */
27 /* It reads the given checkpoint file into memory then jumps to it, thus being just like the original program was restarted */
28 /* from the last checkpoint. */
29 /* */
30 /* It is also used by the checkpoint verification to perform a restore while the original application program is running, to */
31 /* make sure the restore works. The --verify option tells it to rename the checkpoint file, removing the .temp from the end. */
32 /* */
33 /********************************************************************************************************************************/
34
35 #include <unistd.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <sys/mman.h>
43 #include <string.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
46
47 #include "mtcp_internal.h"
48
49 #include <sys/personality.h>
50
51 static char first_char(char *filename);
52 static int open_ckpt_to_read(char *filename);
53 static void readcs (int fd, char cs);
54 static void readfile (int fd, void *buf, size_t size);
55
56 static pid_t gzip_child_pid = -1;
57
58 extern int dmtcp_info_stderr_fd;
59
60 //shift args
61 #define shift argc--,argv++
62
63 static const char* theUsage =
64 "USAGE:\n"
65 "mtcp_restart [--verify] <ckeckpointfile>\n\n"
66 "mtcp_restart [--offset <offset-in-bytes>] [--stderr-fd <fd>] [--] <ckeckpointfile>\n\n"
67 "mtcp_restart [--fd <ckpt-fd>] [--gzip-child-pid <pid>] [--rename-ckpt <newname>] [--stderr-fd <fd>]\n\n"
68 ;
69
|
Event main_wrong_num_params: |
nonstandard number of parameters for "main", expected zero or two parameters |
|
Event caretline: |
^ |
70 int main (int argc, char *argv[], char *envp[])
71 {
72 char magicbuf[MAGIC_LEN], *restorename;
73 int fd, verify;
74 size_t restore_size, offset=0;
75 void *restore_begin, *restore_mmap;
76 void (*restore_start) (int fd, int verify, pid_t gzip_child_pid,
77 char *ckpt_newname, char *cmd_file,
78 char *argv[], char *envp[]);
79 char cmd_file[MAXPATHLEN+1];
80 char ckpt_newname[MAXPATHLEN+1] = "";
81 char **orig_argv = argv;
82
83 if (getuid() == 0 || geteuid() == 0) {
84 mtcp_printf("Running mtcp_restart as root is dangerous. Aborting.\n" \
85 "If you still want to do this (at your own risk)," \
86 " then modify mtcp/%s:%d and re-compile.\n",
87 __FILE__, __LINE__ - 4);
88 abort();
89 }
90
91 /* Turn off randomize_va (by re-exec'ing) or warn user if vdso_enabled is on. */
92 mtcp_check_vdso_enabled();
93
94 /* DELETE THE "#else" CASE AND MAKE THIS PERMANENT, ONCE IT'S BEEN USED A LOT.
95 * IT WAS ADDED IN rev. 458.
96 */
97 #if 1
98 fd = gzip_child_pid = -1;
99 verify = 0;
100
101 shift;
102 while (1) {
103 if (argc == 0 || (strcasecmp(argv[0], "--help") == 0 && argc == 1)) {
104 mtcp_printf("%s", theUsage);
105 return (-1);
106 } else if (strcasecmp (argv[0], "--verify") == 0 && argc == 2) {
107 verify = 1;
108 restorename = argv[1];
109 break;
110 } else if (strcasecmp (argv[0], "--offset") == 0 && argc >= 3) {
111 offset = atoi(argv[1]);
112 shift; shift;
113 } else if (strcasecmp (argv[0], "--fd") == 0 && argc >= 2) {
114 fd = atoi(argv[1]);
115 shift; shift;
116 } else if (strcasecmp (argv[0], "--gzip-child-pid") == 0 && argc >= 2) {
117 gzip_child_pid = atoi(argv[1]);
118 shift; shift;
119 } else if (strcasecmp (argv[0], "--rename-ckpt") == 0 && argc >= 2) {
120 strncpy(ckpt_newname, argv[1], MAXPATHLEN);
121 shift; shift;
122 } else if (strcasecmp (argv[0], "--stderr-fd") == 0 && argc >= 2) {
123 dmtcp_info_stderr_fd = atoi(argv[1]);
124 shift; shift;
125 } else if (strcasecmp (argv[0], "--") == 0 && argc == 2) {
126 restorename = argv[1];
127 break;
128 } else if (argc == 1) {
129 restorename = argv[0];
130 break;
131 } else {
132 mtcp_printf("%s", theUsage);
133 return (-1);
134 }
135 }
136
137 /* XXX XXX XXX:
138 * DO NOT USE mtcp_printf OR DPRINTF BEFORE THIS BLOCK, IT'S DANGEROUS AND
139 * CAN MESS UP YOUR PROCESSES BY WRITING GARBAGE TO THEIR STDERR FD,
140 * IF THEY ARE NOT USING IT AS STDERR.
141 * --Kapil
142 */
143
144 if (fd != -1 && gzip_child_pid != -1) {
145 restorename = NULL;
146 } else if ((fd == -1 && gzip_child_pid != -1) ||
147 (offset != 0 && fd != -1)) {
148 mtcp_printf("%s", theUsage);
149 return (-1);
150 }
151
152 if (restorename) {
153 struct stat buf;
154 int rc = stat(restorename, &buf);
155 if (rc == -1) {
156 char error_msg[MAXPATHLEN+35];
157 sprintf(error_msg, "\nmtcp_restart: ckpt image %s", restorename);
158 perror(error_msg);
159 abort();
160 } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/
161 mtcp_printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \
162 "checkpoint image (%s).\n" \
163 "This is dangerous. Aborting for security reasons.\n" \
164 "If you still want to do this, modify mtcp/%s:%d and re-compile.\n",
165 getuid(), buf.st_uid, restorename, __FILE__, __LINE__ - 5);
166 abort();
167 }
168 }
169
170 if (strlen(ckpt_newname) == 0 && restorename != NULL && offset != 0) {
171 strncpy(ckpt_newname, restorename, MAXPATHLEN);
172 }
173
174 #else
175
176 if (argc == 2) {
177 verify = 0;
178 restorename = argv[1];
179 } else if ((argc == 3) && (strcasecmp (argv[1], "--verify") == 0)) {
180 verify = 1;
181 restorename = argv[2];
182 } else if ((argc == 4) && (strcasecmp (argv[1], "--offset") == 0)) {
183 verify = 0;
184 offset = atoi(argv[2]);
185 restorename = argv[3];
186 strncpy(ckpt_newname,restorename,MAXPATHLEN);
187 } else if ((argc == 3) && (strcasecmp (argv[1], "--fd") == 0)) {
188 /* This case used only when dmtcp_restart exec's to mtcp_restart. */
189 verify = 0;
190 restorename = NULL;
191 fd = atoi(argv[2]);
192 } else if ((argc == 5) && (strcasecmp (argv[1], "--fd") == 0)
193 && (strcasecmp (argv[3], "--gzip-child-pid") == 0)) {
194 /* This case used only when dmtcp_restart exec's to mtcp_restart. */
195 verify = 0;
196 restorename = NULL;
197 fd = atoi(argv[2]);
198 gzip_child_pid = atoi(argv[4]);
199 } else if ((argc == 7) && (strcasecmp (argv[1], "--fd") == 0)
200 && (strcasecmp (argv[3], "--gzip-child-pid") == 0)
201 && (strcasecmp (argv[5], "--rename-ckpt") == 0)) {
202 /* This case used only when dmtcp_restart exec's to mtcp_restart. & wants to rename checkpoint filename */
203 verify = 0;
204 restorename = NULL;
205 fd = atoi(argv[2]);
206 gzip_child_pid = atoi(argv[4]);
207 strncpy(ckpt_newname,argv[6],MAXPATHLEN);
208 } else {
209 mtcp_printf("%s", theUsage);
210 return (-1);
211 }
212
213 #endif
214
215 if(restorename!=NULL) fd = open_ckpt_to_read(restorename);
216 if(offset>0){
217 //skip into the file a bit
218 char* tmp = malloc(offset);
219 readfile(fd, tmp, offset);
220 free(tmp);
221 }
222 memset(magicbuf, 0, sizeof magicbuf);
223 readfile (fd, magicbuf, MAGIC_LEN);
224 if (memcmp (magicbuf, MAGIC, MAGIC_LEN) != 0) {
225 mtcp_printf("mtcp_restart: '%s' is '%s', but this restore is '%s' (fd=%d)\n", restorename, magicbuf, MAGIC, fd);
226 return (-1);
227 }
228
229 /* Set the resourse limits for stack from saved values */
230 struct rlimit stack_rlimit;
231 readcs (fd, CS_STACKRLIMIT); /* resource limit for stack */
232 readfile (fd, &stack_rlimit, sizeof stack_rlimit);
233 #ifdef DEBUG
234 mtcp_printf("mtcp_restart: saved stack resource limit: soft_lim:%p, hard_lim:%p\n", stack_rlimit.rlim_cur, stack_rlimit.rlim_max);
235 #endif
236 setrlimit(RLIMIT_STACK, &stack_rlimit);
237
238 /* Find where the restore image goes */
239 readcs (fd, CS_RESTOREBEGIN); /* beginning of checkpointed libmtcp.so image */
240 readfile (fd, &restore_begin, sizeof restore_begin);
241 readcs (fd, CS_RESTORESIZE); /* size of checkpointed libmtcp.so image */
242 readfile (fd, &restore_size, sizeof restore_size);
243 readcs (fd, CS_RESTORESTART);
244 readfile (fd, &restore_start, sizeof restore_start);
245
246 /* Read in the restore image to same address where it was loaded at time
247 * of checkpoint. This is libmtcp.so, including both text and data sections
248 * as a single section. Hence, we need both write and exec permission,
249 * and MAP_ANONYMOUS, since the data could have changed.
250 */
251
252 #ifdef DEBUG
253 mtcp_printf("mtcp_restart.c: main*: restoring anonymous area %p at %p\n",
254 restore_size, restore_begin);
255 #endif
256 if (munmap(restore_begin, restore_size) < 0) {
257 mtcp_printf("mtcp_restart.c: failed to unmap region at %p\n", restore_begin);
258 abort ();
259 }
260 restore_mmap = mtcp_safemmap (restore_begin, restore_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE, -1, 0);
261 if (restore_mmap == MAP_FAILED) {
262 #ifndef _XOPEN_UNIX
263 mtcp_printf("mtcp_restart: Does mmap here support MAP_FIXED?\n");
264 #endif
265 if (mtcp_sys_errno != EBUSY) {
266 mtcp_printf("mtcp_restart: error creating %p byte restore region at %p: %s\n", restore_size, restore_begin, strerror(mtcp_sys_errno));
267 abort ();
268 } else {
269 mtcp_printf("mtcp_restart: info: restarting due to address conflict...\n");
270 close (fd);
271 argv = orig_argv;
272 execvp (argv[0], argv);
273 }
274 }
275 if (restore_mmap != restore_begin) {
276 mtcp_printf("mtcp_restart: %p byte restore region at %p got mapped at %p\n", restore_size, restore_begin, restore_mmap);
277 abort ();
278 }
279 readcs (fd, CS_RESTOREIMAGE);
280 readfile (fd, restore_begin, restore_size);
281
282 #ifndef __x86_64__
283 // Copy command line to libmtcp.so, so that we can re-exec if randomized vdso
284 // steps on us. This won't be needed when we use the linker to map areas.
285 cmd_file[0] = '\0';
286 { int cmd_len = readlink("/proc/self/exe", cmd_file, MAXPATHLEN);
287 if (cmd_len == -1)
288 mtcp_printf("WARNING: Couldn't find /proc/self/exe."
289 " Trying to continue anyway.\n");
290 else
291 cmd_file[cmd_len] = '\0';
292 }
293 #endif
294
295 #if defined(DEBUG) && ! DMTCP_DEBUG
296 char *p, symbolbuff[256];
297 FILE *symbolfile;
298 VA textbase;
299
300 mtcp_printf("mtcp_restart*: restore_begin=%p, restore_start=%p\n", restore_begin, restore_start);
301 textbase = 0;
302
303 symbolfile = popen ("readelf -S libmtcp.so", "r");
304 if (symbolfile != NULL) {
305 while (fgets (symbolbuff, sizeof symbolbuff, symbolfile) != NULL) {
306 if (memcmp (symbolbuff + 5, "] .text ", 8) == 0) {
307 textbase = strtoul (symbolbuff + 41, &p, 16);
308 }
309 }
310 pclose (symbolfile);
311 if (textbase != 0) {
312 mtcp_printf("\n**********\nmtcp_restart*: The symbol table of the"
313 " checkpointed file can be\nmade available to gdb."
314 " Just type the command below in gdb:\n");
315 mtcp_printf(" add-symbol-file libmtcp.so %p\n",
316 restore_begin + textbase);
317 mtcp_printf("Then type \"continue\" to continue debugging.\n");
318 mtcp_printf("**********\n");
319 }
320 }
321 mtcp_maybebpt ();
322 #endif
323
324 /* Now call it - it shouldn't return */
325 (*restore_start) (fd, verify, gzip_child_pid, ckpt_newname, cmd_file, argv, envp);
326 mtcp_printf("mtcp_restart: restore routine returned (it should never do this!)\n");
327 abort ();
328 return (0);
329 }
330
331 /**
332 * This function will return the first character of the given file. If the
333 * file is not readable, we will abort.
334 *
335 * @param filename the name of the file to read
336 * @return the first character of the given file
337 */
338 static char first_char(char *filename)
339 {
340 int fd, rc;
341 char c;
342
343 fd = open(filename, O_RDONLY);
344 if(fd < 0)
345 {
346 mtcp_printf("ERROR: Cannot open file %s\n", filename);
347 abort();
348 }
349
350 rc = read(fd, &c, 1);
351 if(rc != 1)
352 {
353 mtcp_printf("ERROR: Error reading from file %s\n", filename);
354 abort();
355 }
356
357 close(fd);
358 return c;
359 }
360
361 /**
362 * This function will open the checkpoint file stored at the given filename.
363 * It will check the magic number and take the appropriate action. If the
364 * magic number is unknown, we will abort. The fd returned points to the
365 * beginning of the uncompressed data.
366 * NOTE: related code in ../dmtcp/src/connectionmanager.cpp:open_ckpt_to_read()
367 *
368 * @param filename the name of the checkpoint file
369 * @return the fd to use
370 */
371 static int open_ckpt_to_read(char *filename) {
372 int fd;
373 int fds[2];
374 char fc;
375 char *gzip_cmd = "gzip";
376 char gzip_path[MTCP_MAX_PATH];
377 static char *gzip_args[] = { "gzip", "-d", "-", NULL };
378 pid_t cpid;
379
380 fc = first_char(filename);
381 fd = open(filename, O_RDONLY);
382 if(fd < 0) {
383 mtcp_printf("ERROR: Cannot open checkpoint file %s\n", filename);
384 abort();
385 }
386
387 if (fc == MAGIC_FIRST || fc == 'D') /* no compression ('D' from DMTCP) */
388 return fd;
389 else if (fc == GZIP_FIRST) /* gzip : Set gzip_path */ {
390 if (mtcp_find_executable(gzip_cmd, gzip_path) == NULL) {
391 fputs("ERROR: Cannot find gunzip to decompress checkpoint file!\n", stderr);
392 abort();
393 }
394
395 if (pipe(fds) == -1) {
396 fputs("ERROR: Cannot create pipe to execute gunzip to decompress checkpoint file!\n", stderr);
397 abort();
398 }
399
400 cpid = fork();
401
402 if(cpid == -1) {
403 fputs("ERROR: Cannot fork to execute gunzip to decompress checkpoint file!\n", stderr);
404 abort();
405 }
406 else if(cpid > 0) /* parent process */ {
407 gzip_child_pid = cpid;
408 close(fd);
409 close(fds[1]);
410 return fds[0];
411 }
412 else /* child process */ {
413 fd = dup(dup(dup(fd)));
414 if (fd == -1) {
415 fputs("ERROR: dup() failed! No restoration will be performed! Cancel now!\n", stderr);
416 mtcp_abort();
417 }
418 fds[1] = dup(fds[1]);
419 close(fds[0]);
420 if (dup2(fd, STDIN_FILENO) != STDIN_FILENO) {
421 fputs("ERROR: dup2() failed! No restoration will be performed! Cancel now!\n", stderr);
422 mtcp_abort();
423 }
424 close(fd);
425 dup2(fds[1], STDOUT_FILENO);
426 close(fds[1]);
427 execvp(gzip_path, gzip_args);
428 /* should not get here */
429 fputs("ERROR: Decompression failed! No restoration will be performed! Cancel now!\n", stderr);
430 abort();
431 }
432 }
433 else /* invalid magic number */ {
434 fputs("ERROR: Invalid magic number in this checkpoint file!\n", stderr);
435 abort();
436 }
437 }
438
439 static void readcs (int fd, char cs)
440
441 {
442 char xcs;
443
444 readfile (fd, &xcs, sizeof xcs);
445 if (xcs != cs) {
446 mtcp_printf("mtcp_restart readcs: checkpoint section %d next, expected %d\n", xcs, cs);
447 abort ();
448 }
449 }
450
451 static void readfile(int fd, void *buf, size_t size)
452 {
453 int rc, ar;
454
455 ar = 0;
456
457 while(ar != size)
458 {
459 rc = read(fd, buf + ar, size - ar);
460 if(rc < 0)
461 {
462 mtcp_printf("mtcp_restart readfile: error reading checkpoint file: %s\n", strerror(errno));
463 abort();
464 }
465 else if(rc == 0)
466 {
467 mtcp_printf("mtcp_restart readfile: only read %d bytes instead of %d from checkpoint file\n", ar, size);
468 abort();
469 }
470
471 ar += rc;
472 }
473 }