1 /*****************************************************************************
2 * Copyright (C) 2006-2008 by Michael Rieker, Jason Ansel, Kapil Arya, and *
3 * Gene Cooperman *
4 * mrieker@nii.net, jansel@csail.mit.edu, kapil@ccs.neu.edu, and *
5 * gene@ccs.neu.edu *
6 * *
7 * This file is part of the MTCP module of DMTCP (DMTCP:mtcp). *
8 * *
9 * DMTCP:mtcp is free software: you can redistribute it and/or *
10 * modify it under the terms of the GNU Lesser General Public License as *
11 * published by the Free Software Foundation, either version 3 of the *
12 * License, or (at your option) any later version. *
13 * *
14 * DMTCP:dmtcp/src is distributed in the hope that it will be useful, *
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
17 * GNU Lesser General Public License for more details. *
18 * *
19 * You should have received a copy of the GNU Lesser General Public *
20 * License along with DMTCP:dmtcp/src. If not, see *
21 * <http://www.gnu.org/licenses/>. *
22 *****************************************************************************/
23
24 /********************************************************************************************************************************/
25 /* */
26 /* This command-line utility is what a user uses to perform a restore */
27 /* It reads the given checkpoint file into memory then jumps to it, thus being just like the original program was restarted */
28 /* from the last checkpoint. */
29 /* */
30 /* It is also used by the checkpoint verification to perform a restore while the original application program is running, to */
31 /* make sure the restore works. The --verify option tells it to rename the checkpoint file, removing the .temp from the end. */
32 /* */
33 /********************************************************************************************************************************/
34
35 #include <unistd.h>
36 #include <sys/types.h>
37 #include <sys/stat.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <sys/mman.h>
43 #include <string.h>
44 #include <sys/resource.h>
45 #include <sys/types.h>
46
47 #include "mtcp_internal.h"
48
49 #include <sys/personality.h>
50
51 static char first_char(char *filename);
52 static int open_ckpt_to_read(char *filename);
53 static void readcs (int fd, char cs);
54 static void readfile (int fd, void *buf, size_t size);
55
56 static pid_t gzip_child_pid = -1;
57
58 extern int dmtcp_info_stderr_fd;
59
60 //shift args
61 #define shift argc--,argv++
62
63 static const char* theUsage =
64 "USAGE:\n"
65 "mtcp_restart [--verify] <ckeckpointfile>\n\n"
66 "mtcp_restart [--offset <offset-in-bytes>] [--stderr-fd <fd>] [--] <ckeckpointfile>\n\n"
67 "mtcp_restart [--fd <ckpt-fd>] [--gzip-child-pid <pid>] [--rename-ckpt <newname>] [--stderr-fd <fd>]\n\n"
68 ;
69
70 int main (int argc, char *argv[], char *envp[])
71 {
72 char magicbuf[MAGIC_LEN], *restorename;
73 int fd, verify;
74 size_t restore_size, offset=0;
75 void *restore_begin, *restore_mmap;
76 void (*restore_start) (int fd, int verify, pid_t gzip_child_pid,
77 char *ckpt_newname, char *cmd_file,
78 char *argv[], char *envp[]);
79 char cmd_file[MAXPATHLEN+1];
80 char ckpt_newname[MAXPATHLEN+1] = "";
81 char **orig_argv = argv;
82
83 if (getuid() == 0 || geteuid() == 0) {
84 mtcp_printf("Running mtcp_restart as root is dangerous. Aborting.\n" \
85 "If you still want to do this (at your own risk)," \
86 " then modify mtcp/%s:%d and re-compile.\n",
87 __FILE__, __LINE__ - 4);
88 abort();
89 }
90
91 /* Turn off randomize_va (by re-exec'ing) or warn user if vdso_enabled is on. */
92 mtcp_check_vdso_enabled();
93
94 /* DELETE THE "#else" CASE AND MAKE THIS PERMANENT, ONCE IT'S BEEN USED A LOT.
95 * IT WAS ADDED IN rev. 458.
96 */
97 #if 1
98 fd = gzip_child_pid = -1;
99 verify = 0;
100
101 shift;
102 while (1) {
103 if (argc == 0 || (strcasecmp(argv[0], "--help") == 0 && argc == 1)) {
104 mtcp_printf("%s", theUsage);
105 return (-1);
106 } else if (strcasecmp (argv[0], "--verify") == 0 && argc == 2) {
107 verify = 1;
108 restorename = argv[1];
109 break;
110 } else if (strcasecmp (argv[0], "--offset") == 0 && argc >= 3) {
111 offset = atoi(argv[1]);
112 shift; shift;
113 } else if (strcasecmp (argv[0], "--fd") == 0 && argc >= 2) {
114 fd = atoi(argv[1]);
115 shift; shift;
116 } else if (strcasecmp (argv[0], "--gzip-child-pid") == 0 && argc >= 2) {
117 gzip_child_pid = atoi(argv[1]);
118 shift; shift;
119 } else if (strcasecmp (argv[0], "--rename-ckpt") == 0 && argc >= 2) {
120 strncpy(ckpt_newname, argv[1], MAXPATHLEN);
121 shift; shift;
122 } else if (strcasecmp (argv[0], "--stderr-fd") == 0 && argc >= 2) {
123 dmtcp_info_stderr_fd = atoi(argv[1]);
124 shift; shift;
125 } else if (strcasecmp (argv[0], "--") == 0 && argc == 2) {
126 restorename = argv[1];
127 break;
128 } else if (argc == 1) {
129 restorename = argv[0];
130 break;
131 } else {
132 mtcp_printf("%s", theUsage);
133 return (-1);
134 }
135 }
136
137 /* XXX XXX XXX:
138 * DO NOT USE mtcp_printf OR DPRINTF BEFORE THIS BLOCK, IT'S DANGEROUS AND
139 * CAN MESS UP YOUR PROCESSES BY WRITING GARBAGE TO THEIR STDERR FD,
140 * IF THEY ARE NOT USING IT AS STDERR.
141 * --Kapil
142 */
143
144 if (fd != -1 && gzip_child_pid != -1) {
145 restorename = NULL;
146 } else if ((fd == -1 && gzip_child_pid != -1) ||
147 (offset != 0 && fd != -1)) {
148 mtcp_printf("%s", theUsage);
149 return (-1);
150 }
151
152 if (restorename) {
153 struct stat buf;
154 int rc = stat(restorename, &buf);
155 if (rc == -1) {
156 char error_msg[MAXPATHLEN+35];
|
Event secure_coding: |
[VERY RISKY]. Using "sprintf" can cause a buffer overflow when done incorrectly. Because sprintf() assumes an arbitrarily long string, callers must be careful not to overflow the actual space of the destination. Use snprintf() instead, or correct precision specifiers. |
157 sprintf(error_msg, "\nmtcp_restart: ckpt image %s", restorename);
158 perror(error_msg);
159 abort();
160 } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/
161 mtcp_printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \
162 "checkpoint image (%s).\n" \
163 "This is dangerous. Aborting for security reasons.\n" \
164 "If you still want to do this, modify mtcp/%s:%d and re-compile.\n",
165 getuid(), buf.st_uid, restorename, __FILE__, __LINE__ - 5);
166 abort();
167 }
168 }
169
170 if (strlen(ckpt_newname) == 0 && restorename != NULL && offset != 0) {
171 strncpy(ckpt_newname, restorename, MAXPATHLEN);
172 }
173
174 #else
175
176 if (argc == 2) {
177 verify = 0;
178 restorename = argv[1];
179 } else if ((argc == 3) && (strcasecmp (argv[1], "--verify") == 0)) {
180 verify = 1;
181 restorename = argv[2];
182 } else if ((argc == 4) && (strcasecmp (argv[1], "--offset") == 0)) {
183 verify = 0;
184 offset = atoi(argv[2]);
185 restorename = argv[3];
186 strncpy(ckpt_newname,restorename,MAXPATHLEN);
187 } else if ((argc == 3) && (strcasecmp (argv[1], "--fd") == 0)) {
188 /* This case used only when dmtcp_restart exec's to mtcp_restart. */
189 verify = 0;
190 restorename = NULL;
191 fd = atoi(argv[2]);
192 } else if ((argc == 5) && (strcasecmp (argv[1], "--fd") == 0)
193 && (strcasecmp (argv[3], "--gzip-child-pid") == 0)) {
194 /* This case used only when dmtcp_restart exec's to mtcp_restart. */
195 verify = 0;
196 restorename = NULL;
197 fd = atoi(argv[2]);
198 gzip_child_pid = atoi(argv[4]);
199 } else if ((argc == 7) && (strcasecmp (argv[1], "--fd") == 0)
200 && (strcasecmp (argv[3], "--gzip-child-pid") == 0)
201 && (strcasecmp (argv[5], "--rename-ckpt") == 0)) {
202 /* This case used only when dmtcp_restart exec's to mtcp_restart. & wants to rename checkpoint filename */
203 verify = 0;
204 restorename = NULL;
205 fd = atoi(argv[2]);
206 gzip_child_pid = atoi(argv[4]);
207 strncpy(ckpt_newname,argv[6],MAXPATHLEN);
208 } else {
209 mtcp_printf("%s", theUsage);
210 return (-1);
211 }
212
213 #endif
214
215 if(restorename!=NULL) fd = open_ckpt_to_read(restorename);
216 if(offset>0){
217 //skip into the file a bit
218 char* tmp = malloc(offset);
219 readfile(fd, tmp, offset);
220 free(tmp);
221 }
222 memset(magicbuf, 0, sizeof magicbuf);
223 readfile (fd, magicbuf, MAGIC_LEN);
224 if (memcmp (magicbuf, MAGIC, MAGIC_LEN) != 0) {
225 mtcp_printf("mtcp_restart: '%s' is '%s', but this restore is '%s' (fd=%d)\n", restorename, magicbuf, MAGIC, fd);
226 return (-1);
227 }
228
229 /* Set the resourse limits for stack from saved values */
230 struct rlimit stack_rlimit;
231 readcs (fd, CS_STACKRLIMIT); /* resource limit for stack */
232 readfile (fd, &stack_rlimit, sizeof stack_rlimit);
233 #ifdef DEBUG
234 mtcp_printf("mtcp_restart: saved stack resource limit: soft_lim:%p, hard_lim:%p\n", stack_rlimit.rlim_cur, stack_rlimit.rlim_max);
235 #endif
236 setrlimit(RLIMIT_STACK, &stack_rlimit);
237
238 /* Find where the restore image goes */
239 readcs (fd, CS_RESTOREBEGIN); /* beginning of checkpointed libmtcp.so image */
240 readfile (fd, &restore_begin, sizeof restore_begin);
241 readcs (fd, CS_RESTORESIZE); /* size of checkpointed libmtcp.so image */
242 readfile (fd, &restore_size, sizeof restore_size);
243 readcs (fd, CS_RESTORESTART);
244 readfile (fd, &restore_start, sizeof restore_start);
245
246 /* Read in the restore image to same address where it was loaded at time
247 * of checkpoint. This is libmtcp.so, including both text and data sections
248 * as a single section. Hence, we need both write and exec permission,
249 * and MAP_ANONYMOUS, since the data could have changed.
250 */
251
252 #ifdef DEBUG
253 mtcp_printf("mtcp_restart.c: main*: restoring anonymous area %p at %p\n",
254 restore_size, restore_begin);
255 #endif
256 if (munmap(restore_begin, restore_size) < 0) {
257 mtcp_printf("mtcp_restart.c: failed to unmap region at %p\n", restore_begin);
258 abort ();
259 }
260 restore_mmap = mtcp_safemmap (restore_begin, restore_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_FIXED | MAP_PRIVATE, -1, 0);
261 if (restore_mmap == MAP_FAILED) {
262 #ifndef _XOPEN_UNIX
263 mtcp_printf("mtcp_restart: Does mmap here support MAP_FIXED?\n");
264 #endif
265 if (mtcp_sys_errno != EBUSY) {
266 mtcp_printf("mtcp_restart: error creating %p byte restore region at %p: %s\n", restore_size, restore_begin, strerror(mtcp_sys_errno));
267 abort ();
268 } else {
269 mtcp_printf("mtcp_restart: info: restarting due to address conflict...\n");
270 close (fd);
271 argv = orig_argv;
272 execvp (argv[0], argv);
273 }
274 }
275 if (restore_mmap != restore_begin) {
276 mtcp_printf("mtcp_restart: %p byte restore region at %p got mapped at %p\n", restore_size, restore_begin, restore_mmap);
277 abort ();
278 }
279 readcs (fd, CS_RESTOREIMAGE);
280 readfile (fd, restore_begin, restore_size);
281
282 #ifndef __x86_64__
283 // Copy command line to libmtcp.so, so that we can re-exec if randomized vdso
284 // steps on us. This won't be needed when we use the linker to map areas.
285 cmd_file[0] = '\0';
286 { int cmd_len = readlink("/proc/self/exe", cmd_file, MAXPATHLEN);
287 if (cmd_len == -1)
288 mtcp_printf("WARNING: Couldn't find /proc/self/exe."
289 " Trying to continue anyway.\n");
290 else
291 cmd_file[cmd_len] = '\0';
292 }
293 #endif
294
295 #if defined(DEBUG) && ! DMTCP_DEBUG
296 char *p, symbolbuff[256];
297 FILE *symbolfile;
298 VA textbase;
299
300 mtcp_printf("mtcp_restart*: restore_begin=%p, restore_start=%p\n", restore_begin, restore_start);
301 textbase = 0;
302
303 symbolfile = popen ("readelf -S libmtcp.so", "r");
304 if (symbolfile != NULL) {
305 while (fgets (symbolbuff, sizeof symbolbuff, symbolfile) != NULL) {
306 if (memcmp (symbolbuff + 5, "] .text ", 8) == 0) {
307 textbase = strtoul (symbolbuff + 41, &p, 16);
308 }
309 }
310 pclose (symbolfile);
311 if (textbase != 0) {
312 mtcp_printf("\n**********\nmtcp_restart*: The symbol table of the"
313 " checkpointed file can be\nmade available to gdb."
314 " Just type the command below in gdb:\n");
315 mtcp_printf(" add-symbol-file libmtcp.so %p\n",
316 restore_begin + textbase);
317 mtcp_printf("Then type \"continue\" to continue debugging.\n");
318 mtcp_printf("**********\n");
319 }
320 }
321 mtcp_maybebpt ();
322 #endif
323
324 /* Now call it - it shouldn't return */
325 (*restore_start) (fd, verify, gzip_child_pid, ckpt_newname, cmd_file, argv, envp);
326 mtcp_printf("mtcp_restart: restore routine returned (it should never do this!)\n");
327 abort ();
328 return (0);
329 }
330
331 /**
332 * This function will return the first character of the given file. If the
333 * file is not readable, we will abort.
334 *
335 * @param filename the name of the file to read
336 * @return the first character of the given file
337 */
338 static char first_char(char *filename)
339 {
340 int fd, rc;
341 char c;
342
343 fd = open(filename, O_RDONLY);
344 if(fd < 0)
345 {
346 mtcp_printf("ERROR: Cannot open file %s\n", filename);
347 abort();
348 }
349
350 rc = read(fd, &c, 1);
351 if(rc != 1)
352 {
353 mtcp_printf("ERROR: Error reading from file %s\n", filename);
354 abort();
355 }
356
357 close(fd);
358 return c;
359 }
360
361 /**
362 * This function will open the checkpoint file stored at the given filename.
363 * It will check the magic number and take the appropriate action. If the
364 * magic number is unknown, we will abort. The fd returned points to the
365 * beginning of the uncompressed data.
366 * NOTE: related code in ../dmtcp/src/connectionmanager.cpp:open_ckpt_to_read()
367 *
368 * @param filename the name of the checkpoint file
369 * @return the fd to use
370 */
371 static int open_ckpt_to_read(char *filename) {
372 int fd;
373 int fds[2];
374 char fc;
375 char *gzip_cmd = "gzip";
376 char gzip_path[MTCP_MAX_PATH];
377 static char *gzip_args[] = { "gzip", "-d", "-", NULL };
378 pid_t cpid;
379
380 fc = first_char(filename);
381 fd = open(filename, O_RDONLY);
382 if(fd < 0) {
383 mtcp_printf("ERROR: Cannot open checkpoint file %s\n", filename);
384 abort();
385 }
386
387 if (fc == MAGIC_FIRST || fc == 'D') /* no compression ('D' from DMTCP) */
388 return fd;
389 else if (fc == GZIP_FIRST) /* gzip : Set gzip_path */ {
390 if (mtcp_find_executable(gzip_cmd, gzip_path) == NULL) {
391 fputs("ERROR: Cannot find gunzip to decompress checkpoint file!\n", stderr);
392 abort();
393 }
394
395 if (pipe(fds) == -1) {
396 fputs("ERROR: Cannot create pipe to execute gunzip to decompress checkpoint file!\n", stderr);
397 abort();
398 }
399
400 cpid = fork();
401
402 if(cpid == -1) {
403 fputs("ERROR: Cannot fork to execute gunzip to decompress checkpoint file!\n", stderr);
404 abort();
405 }
406 else if(cpid > 0) /* parent process */ {
407 gzip_child_pid = cpid;
408 close(fd);
409 close(fds[1]);
410 return fds[0];
411 }
412 else /* child process */ {
413 fd = dup(dup(dup(fd)));
414 if (fd == -1) {
415 fputs("ERROR: dup() failed! No restoration will be performed! Cancel now!\n", stderr);
416 mtcp_abort();
417 }
418 fds[1] = dup(fds[1]);
419 close(fds[0]);
420 if (dup2(fd, STDIN_FILENO) != STDIN_FILENO) {
421 fputs("ERROR: dup2() failed! No restoration will be performed! Cancel now!\n", stderr);
422 mtcp_abort();
423 }
424 close(fd);
425 dup2(fds[1], STDOUT_FILENO);
426 close(fds[1]);
427 execvp(gzip_path, gzip_args);
428 /* should not get here */
429 fputs("ERROR: Decompression failed! No restoration will be performed! Cancel now!\n", stderr);
430 abort();
431 }
432 }
433 else /* invalid magic number */ {
434 fputs("ERROR: Invalid magic number in this checkpoint file!\n", stderr);
435 abort();
436 }
437 }
438
439 static void readcs (int fd, char cs)
440
441 {
442 char xcs;
443
444 readfile (fd, &xcs, sizeof xcs);
445 if (xcs != cs) {
446 mtcp_printf("mtcp_restart readcs: checkpoint section %d next, expected %d\n", xcs, cs);
447 abort ();
448 }
449 }
450
451 static void readfile(int fd, void *buf, size_t size)
452 {
453 int rc, ar;
454
455 ar = 0;
456
457 while(ar != size)
458 {
459 rc = read(fd, buf + ar, size - ar);
460 if(rc < 0)
461 {
462 mtcp_printf("mtcp_restart readfile: error reading checkpoint file: %s\n", strerror(errno));
463 abort();
464 }
465 else if(rc == 0)
466 {
467 mtcp_printf("mtcp_restart readfile: only read %d bytes instead of %d from checkpoint file\n", ar, size);
468 abort();
469 }
470
471 ar += rc;
472 }
473 }