1 /*****************************************************************************
2 * Copyright (C) 2006-2010 by Michael Rieker, Jason Ansel, Kapil Arya, and *
3 * Gene Cooperman *
4 * mrieker@nii.net, jansel@csail.mit.edu, kapil@ccs.neu.edu, and *
5 * gene@ccs.neu.edu *
6 * *
7 * This file is part of the MTCP module of DMTCP (DMTCP:mtcp). *
8 * *
9 * DMTCP:mtcp is free software: you can redistribute it and/or *
10 * modify it under the terms of the GNU Lesser General Public License as *
11 * published by the Free Software Foundation, either version 3 of the *
12 * License, or (at your option) any later version. *
13 * *
14 * DMTCP:dmtcp/src is distributed in the hope that it will be useful, *
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
17 * GNU Lesser General Public License for more details. *
18 * *
19 * You should have received a copy of the GNU Lesser General Public *
20 * License along with DMTCP:dmtcp/src. If not, see *
21 * <http://www.gnu.org/licenses/>. *
22 *****************************************************************************/
23
24 /********************************************************************************************************************************/
25 /* */
26 /* Static part of restore - This gets linked in the libmtcp.so shareable image that gets loaded as part of the user's original */
27 /* application. The makefile appends all the needed system call routines onto the end of this module so it will link with no */
28 /* undefined symbols. This allows the restore procedure to simply read this object image from the restore file, load it to */
29 /* the same address it was in the user's original application, and jump to it. */
30 /* */
31 /* If we didn't assemble it all as one module, the idiot loader would make references to glibc routines go to libc.so even */
32 /* though there are object modules linked in with those routines defined. */
33 /* */
34 /********************************************************************************************************************************/
35
36 #include <errno.h>
37 #include <fcntl.h>
38 #include <sched.h>
39 #include <signal.h>
40 #include <stdarg.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <sys/mman.h>
45 #include <sys/stat.h>
46 #include <unistd.h>
47
48 #include "mtcp_internal.h"
49
50 __attribute__ ((visibility ("hidden")))
51 int mtcp_restore_cpfd = -1; // '= -1' puts it in regular data instead of common
52 __attribute__ ((visibility ("hidden")))
53 int mtcp_restore_verify = 0;// 0: normal restore; 1: verification restore
54 __attribute__ ((visibility ("hidden")))
55 pid_t mtcp_restore_gzip_child_pid = -1; // '= -1' puts it in regular data instead of common
56 __attribute__ ((visibility ("hidden"))) char mtcp_ckpt_newname[MAXPATHLEN+1];
57 #define MAX_ARGS 50
58 __attribute__ ((visibility ("hidden"))) char *mtcp_restore_cmd_file;
59
60 __attribute__ ((visibility ("hidden"))) char *mtcp_restore_argv[MAX_ARGS+1];
61 __attribute__ ((visibility ("hidden"))) char *mtcp_restore_envp[MAX_ARGS+1];
62 __attribute__ ((visibility ("hidden")))
63 void *mtcp_saved_break = NULL; // saved brk (0) value
64
65 /* These two are used by the linker script to define the beginning and end of the image. */
66 /* The '.long 0' is needed so shareable_begin>0 as the linker is too st00pid to relocate a zero. */
67
68 asm (".section __shareable_begin ; .globl mtcp_shareable_begin ; .long 0 ; mtcp_shareable_begin:");
69 asm (".section __shareable_end ; .globl mtcp_shareable_end; .long 0 ; mtcp_shareable_end:");
70 asm (".text");
71
72 /* Internal routines */
73
74 static void readfiledescrs (void);
75 static void readmemoryareas (void);
76 static void readcs (char cs);
77 static void readfile (void *buf, size_t size);
78 static void mmapfile(void *buf, size_t size, int prot, int flags);
79 static void skipfile(size_t size);
80 static void read_shared_memory_area_from_file(Area* area, int flags);
81 static VA highest_userspace_address (VA *vdso_addr, VA *vsyscall_addr,
82 VA * stack_end_addr);
83 static int open_shared_file(char* fileName);
84 static void lock_file(int fd, char* name, short l_type);
85 // These will all go away when we use a linker to reserve space.
86 static VA global_vdso_addr = 0;
87
88 static void *mystrstr(char *string, char *substring) {
89 for ( ; *string != '\0' ; string++) {
90 char *ptr1, *ptr2;
91 for (ptr1 = string, ptr2 = substring;
92 *ptr1 == *ptr2 && *ptr2 != '\0';
93 ptr1++, ptr2++) ;
94 if (*ptr2 == '\0')
95 return string;
96 }
97 return NULL;
98 }
99
100 /********************************************************************************************************************************/
101 /* */
102 /* This routine is called executing on the temporary stack */
103 /* It performs the actual restore of everything (except the libmtcp.so area) */
104 /* */
105 /********************************************************************************************************************************/
106
107 __attribute__ ((visibility ("hidden"))) void mtcp_restoreverything (void)
108
109 {
110 int rc;
111 VA holebase, highest_va;
112 VA vdso_addr = (VA)NULL, vsyscall_addr = (VA)NULL,
113 stack_end_addr = (VA)NULL; /* VA = virtual address */
114 void *current_brk;
115 void *new_brk;
116 void (*finishrestore) (void);
117
118 DPRINTF(("Entering mtcp_restart_nolibc.c:mtcp_restoreverything\n"));
119
120 /* The kernel (2.6.9 anyway) has a variable mm->brk that we should restore. The only access we have is brk() which basically */
121 /* sets mm->brk to the new value, but also has a nasty side-effect (as far as we're concerned) of mmapping an anonymous */
122 /* section between the old value of mm->brk and the value being passed to brk(). It will munmap the bracketed memory if the */
123 /* value being passed is lower than the old value. But if zero, it will return the current mm->brk value. */
124
125 /* So we're going to restore the brk here. As long as the current mm->brk value is below the static restore region, we're ok */
126 /* because we 'know' the restored brk can't be in the static restore region, and we don't care if the kernel mmaps something */
127 /* or munmaps something because we're going to wipe it all out anyway. */
128
129 current_brk = mtcp_sys_brk (NULL);
130 if (((VA)current_brk > (VA)mtcp_shareable_begin) && ((VA)mtcp_saved_break < (VA)mtcp_shareable_end)) {
131 mtcp_printf ("mtcp_restoreverything: current_brk %p, mtcp_saved_break %p, mtcp_shareable_begin %p, mtcp_shareable_end %p\n",
132 current_brk, mtcp_saved_break, mtcp_shareable_begin, mtcp_shareable_end);
133 mtcp_abort ();
134 }
135
136 new_brk = mtcp_sys_brk (mtcp_saved_break);
137 if (new_brk == (void *)-1) {
138 mtcp_printf( "mtcp_restoreverything: sbrk(%p): errno: %d (bad heap)\n",
139 mtcp_saved_break, mtcp_sys_errno );
140 mtcp_abort();
141 }
142 if (new_brk != mtcp_saved_break) {
143 if (new_brk == current_brk && new_brk > mtcp_saved_break)
144 DPRINTF(("mtcp_restoreverything: new_brk == current_brk == %p\n"
145 " saved_break, %p, is strictly smaller; data segment not extended.\n",
146 new_brk, mtcp_saved_break));
147 else {
148 mtcp_printf ("mtcp_restoreverything: error: new break (%p) != saved break"
149 " (%p)\n", (VA)current_brk, mtcp_saved_break);
150 mtcp_abort ();
151 }
152 }
153 DPRINTF(("current_brk: %p; mtcp_saved_break: %p; new_brk: %p\n",
154 current_brk, mtcp_saved_break, new_brk));
155
156 /* Unmap everything except for this image as everything we need
157 * is contained in the libmtcp.so image.
158 * Unfortunately, in later Linuxes, it's important also not to wipe
159 * out [vsyscall] if it exists (we may not have permission to remove it).
160 * In any case, [vsyscall] is the highest section if it exists.
161 * Further, if the [vdso] when we restart is different from the old
162 * [vdso] that was saved at checkpoint time, then we need to keep
163 * both of them. The old one may be needed if we're returning from
164 * a system call at checkpoint time. The new one is needed for future
165 * system calls.
166 * Highest_userspace_address is determined heuristically. Primarily, it
167 * was intended to make sure we don't overwrite [vdso] or [vsyscall].
168 * But it was heuristically chosen as a constant (works for earlier
169 * Linuxes), or as the end of stack. Probably, we should review that,
170 * and just make it beginning of [vsyscall] where that exists.
171 */
172
173 holebase = (VA)mtcp_shareable_begin;
174 holebase &= -MTCP_PAGE_SIZE;
175 asm volatile (CLEAN_FOR_64_BIT(xor %%eax,%%eax ; movw %%ax,%%fs)
176 : : : CLEAN_FOR_64_BIT(eax)); // the unmaps will wipe what it points to anyway
177 // asm volatile (CLEAN_FOR_64_BIT(xor %%eax,%%eax ; movw %%ax,%%gs) : : : CLEAN_FOR_64_BIT(eax)); // so make sure we get a hard failure just in case
178 // ... it's left dangling on something I want
179
180 /* Unmap from address 0 to holebase, except for [vdso] section */
181 vdso_addr = vsyscall_addr = stack_end_addr = 0;
182 highest_va = highest_userspace_address(&vdso_addr, &vsyscall_addr,
183 &stack_end_addr);
184 if (stack_end_addr == 0) /* 0 means /proc/self/maps doesn't mark "[stack]" */
185 highest_va = HIGHEST_VA;
186 else
187 highest_va = stack_end_addr;
188 DPRINTF(("new_brk (end of heap): %p, holebase (libmtcp.so): %p, stack_end_addr: %p\n"
189 " vdso_addr: %p, highest_va: %p, vsyscall_addr: %p\n",
190 new_brk, holebase, stack_end_addr,
191 vdso_addr, highest_va, vsyscall_addr));
192
193 if (vdso_addr != (VA)NULL && vdso_addr < holebase) {
194 DPRINTF (("mtcp restoreverything*: unmapping %p..%p, %p..%p\n",
195 NULL, vdso_addr-1, vdso_addr+MTCP_PAGE_SIZE, holebase - 1));
196 rc = mtcp_sys_munmap ((void *)NULL, (size_t)vdso_addr);
197 rc |= mtcp_sys_munmap ((void *)vdso_addr + MTCP_PAGE_SIZE,
198 (size_t)holebase - vdso_addr - MTCP_PAGE_SIZE);
199 } else {
200 DPRINTF (("mtcp restoreverything*: unmapping 0..%p\n", holebase - 1));
201 rc = mtcp_sys_munmap (NULL, holebase);
202 }
203 if (rc == -1) {
204 mtcp_printf ("mtcp_sys_munmap: error %d unmapping from 0 to %p\n",
205 mtcp_sys_errno, holebase);
206 mtcp_abort ();
207 }
208
209 /* Unmap from address holebase to highest_va, except for [vdso] section */
210 /* Value of mtcp_shareable_end (end of data segment) can change from before */
211 holebase = (VA)mtcp_shareable_end;
212 holebase = (holebase + MTCP_PAGE_SIZE - 1) & -MTCP_PAGE_SIZE;
213 if (vdso_addr != (VA)NULL && vdso_addr + MTCP_PAGE_SIZE <= (VA)highest_va) {
214 if (vdso_addr > holebase) {
215 DPRINTF (("mtcp restoreverything*: unmapping %p..%p, %p..%p\n",
216 holebase, vdso_addr-1, vdso_addr+MTCP_PAGE_SIZE, highest_va - 1));
217 rc = mtcp_sys_munmap ((void *)holebase, vdso_addr - holebase);
218 rc |= mtcp_sys_munmap ((void *)vdso_addr + MTCP_PAGE_SIZE,
219 highest_va - vdso_addr - MTCP_PAGE_SIZE);
220 } else {
221 DPRINTF (("mtcp restoreverything*: unmapping %p..%p\n",
222 holebase, highest_va - 1));
223 if (highest_va < holebase) {
224 mtcp_printf ("mtcp_sys_munmap: error unmapping:"
225 " highest_va(%p) < holebase(%p)\n",
226 highest_va, holebase);
227 mtcp_abort ();
228 }
229 rc = mtcp_sys_munmap ((void *)holebase, highest_va - holebase);
230 }
231 }
232 if (rc == -1) {
233 mtcp_printf ("mtcp_sys_munmap: error %d unmapping from %p by %p bytes\n",
234 mtcp_sys_errno, holebase, highest_va - holebase);
235 mtcp_abort ();
236 }
237 DPRINTF(("\n")); /* end of munmap */
238
239 /* Read address of mtcp.c's finishrestore routine */
240
241 readcs (CS_FINISHRESTORE);
242 readfile (&finishrestore, sizeof finishrestore);
243
244 /* Restore file descriptors */
245
246 DPRINTF (("mtcp restoreverything*: restoring file descriptors\n"));
247 readfiledescrs (); // restore files
248
249 /* Restore memory areas */
250
251 global_vdso_addr = vdso_addr;/* This global var goes away when linker used. */
252 DPRINTF (("mtcp restoreverything*: restoring memory areas\n"));
253 readmemoryareas ();
254
255 /* Everything restored, close file and finish up */
256
257 DPRINTF (("mtcp restoreverything*: close cpfd %d\n", mtcp_restore_cpfd));
258 mtcp_sys_close (mtcp_restore_cpfd);
259 mtcp_restore_cpfd = -1;
260 DPRINTF (("mtcp restoreverything*: waiting on gzip_child_pid: %d\n", mtcp_restore_gzip_child_pid ));
261 // Calling waitpid here, but on 32-bit Linux, libc:waitpid() calls wait4()
262 if( mtcp_restore_gzip_child_pid != -1 ) {
263 if( mtcp_sys_wait4(mtcp_restore_gzip_child_pid , NULL, 0, NULL ) == -1 )
264 DPRINTF (("mtcp restoreverything*: error wait4: errno: %d", mtcp_sys_errno));
265 mtcp_restore_gzip_child_pid = -1;
266 }
267
268 DPRINTF (("mtcp restoreverything*: restore complete, resuming...\n"));
269
270 /* Jump to finishrestore in original program's libmtcp.so image */
271
272 (*finishrestore) ();
273 }
274
275 /********************************************************************************************************************************/
276 /* */
277 /* Read file descriptor info from checkpoint file and re-open and re-position files on the same descriptors */
278 /* Move the checkpoint file to a different fd if needed */
279 /* */
280 /********************************************************************************************************************************/
281
282 static void readfiledescrs (void)
283
284 {
285 char linkbuf[FILENAMESIZE];
286 int fdnum, flags, linklen, tempfd;
287 off_t offset;
288 struct stat statbuf;
289
290 readcs (CS_FILEDESCRS);
291
292 while (1) {
293
294 /* Read parameters of next file to restore */
295
296 readfile (&fdnum, sizeof fdnum);
297 if (fdnum < 0) break;
298 readfile (&statbuf, sizeof statbuf);
299 readfile (&offset, sizeof offset);
300 readfile (&linklen, sizeof linklen);
301 if (linklen >= sizeof linkbuf) {
302 mtcp_printf ("filename too long %d\n", linklen);
303 mtcp_abort ();
304 }
305 readfile (linkbuf, linklen);
306 linkbuf[linklen] = 0;
307
308 DPRINTF (("mtcp readfiledescrs*: restoring %d -> %s\n", fdnum, linkbuf));
309
310 /* Maybe it restores to same fd as we're using for checkpoint file. */
311 /* If so, move the checkpoint file somewhere else. */
312
313 if (fdnum == mtcp_restore_cpfd) {
314 flags = mtcp_sys_dup (mtcp_restore_cpfd);
315 if (flags < 0) {
316 mtcp_printf ("mtcp readfiledescrs: error %d duping checkpoint file fd %d\n",
317 mtcp_sys_errno, mtcp_restore_cpfd);
318 mtcp_abort ();
319 }
320 mtcp_restore_cpfd = flags;
321 DPRINTF (("mtcp readfiledescrs*: cpfd changed to %d\n",
322 mtcp_restore_cpfd));
323 }
324
325 /* Open the file on a temp fd */
326
327 flags = O_RDWR;
328 if (!(statbuf.st_mode & S_IWUSR)) flags = O_RDONLY;
329 else if (!(statbuf.st_mode & S_IRUSR)) flags = O_WRONLY;
330 tempfd = mtcp_sys_open (linkbuf, flags, 0);
331 if (tempfd < 0) {
332 mtcp_printf ("mtcp readfiledescrs: error %d re-opening %s flags %o\n", mtcp_sys_errno, linkbuf, flags);
333 if (mtcp_sys_errno == EACCES)
334 mtcp_printf(" Permission denied.\n");
335 //mtcp_abort ();
336 continue;
337 }
338
339 /* Move it to the original fd if it didn't coincidentally open there */
340
341 if (tempfd != fdnum) {
342 if (mtcp_sys_dup2 (tempfd, fdnum) < 0) {
343 mtcp_printf ("mtcp readfiledescrs: error %d duping %s from %d to %d\n", mtcp_sys_errno, linkbuf, tempfd, fdnum);
344 mtcp_abort ();
345 }
346 mtcp_sys_close (tempfd);
347 }
348
349 /* Position the file to its same spot it was at when checkpointed */
350
351 if (S_ISREG (statbuf.st_mode) && (mtcp_sys_lseek (fdnum, offset, SEEK_SET) != offset)) {
352 mtcp_printf ("mtcp readfiledescrs: error %d positioning %s to %ld\n", mtcp_sys_errno, linkbuf, (long)offset);
353 mtcp_abort ();
354 }
355 }
356 }
357
358 /**************************************************************************/
359 /* */
360 /* Read memory area descriptors from checkpoint file */
361 /* Read memory area contents and/or mmap original file */
362 /* Four cases: MAP_ANONYMOUS (if file /proc/.../maps reports file, */
363 /* handle it as if MAP_PRIVATE and not MAP_ANONYMOUS, */
364 /* but restore from ckpt image: no copy-on-write); */
365 /* private, currently assumes backing file exists */
366 /* shared, but need to recreate file; */
367 /* shared and file currently exists */
368 /* (if writeable by us and memory map has write */
369 /* protection, then write to it from checkpoint file; */
370 /* else skip ckpt image and map current data of file) */
371 /* NOTE: Linux option MAP_SHARED|MAP_ANONYMOUS */
372 /* currently not supported; result is undefined. */
373 /* If there is an important use case, we will fix this. */
374 /* (NOTE: mmap requires that if MAP_ANONYMOUS */
375 /* was not set, then mmap must specify a backing store. */
376 /* Further, a reference by mmap constitutes a reference */
377 /* to the file, and so the file cannot truly be deleted */
378 /* until the process no longer maps it. So, if we don't */
379 /* see the file on restart and there is no MAP_ANONYMOUS, */
380 /* then we have a responsibility to recreate the file. */
381 /* MAP_ANONYMOUS is not currently POSIX.) */
382 /* */
383 /**************************************************************************/
384
385 static void readmemoryareas (void)
386
387 {
388 Area area;
389 char cstype;
390 int flags, imagefd;
391 void *mmappedat;
392 /* make check: stale-fd and forkexec fail (and others?) with this turned on. */
393 #if 0
394 /* If not using gzip decompression, then use mmapfile instead of readfile. */
395 int do_mmap_ckpt_image = (mtcp_restore_gzip_child_pid == -1);
396 #else
397 int do_mmap_ckpt_image = 0;
398 #endif
399
400 while (1) {
401 int try_skipping_existing_segment = 0;
402
403 readfile (&cstype, sizeof cstype);
404 if (cstype == CS_THEEND) break;
405 if (cstype != CS_AREADESCRIP) {
406 mtcp_printf ("mtcp_restart_nolibc: expected CS_AREADESCRIP but had %d\n", cstype);
407 mtcp_abort ();
408 }
409 readfile (&area, sizeof area);
410
411 if ((area.flags & MAP_ANONYMOUS) && (area.flags & MAP_SHARED))
412 mtcp_printf("\n\n*** WARNING: Next area specifies MAP_ANONYMOUS"
413 " and MAP_SHARED.\n"
414 "*** Turning off MAP_ANONYMOUS and hoping for best.\n\n");
415
416 /* CASE MAP_ANONYMOUS (usually implies MAP_PRIVATE): */
417 /* For anonymous areas, the checkpoint file contains the memory contents */
418 /* directly. So mmap an anonymous area and read the file into it. */
419 /* If file exists, turn off MAP_ANONYMOUS: standard private map */
420
421 if (area.flags & MAP_ANONYMOUS) {
422
423 /* If there is a filename there, though, pretend like we're mapping */
424 /* to it so a new /proc/self/maps will show a filename there like with */
425 /* original process. We only need read-only access because we don't */
426 /* want to ever write the file. */
427
428 imagefd = 0;
429 if (area.name[0] == '/') { /* If not null string, not [stack] or [vdso] */
430 imagefd = mtcp_sys_open (area.name, O_RDONLY, 0);
431 if (imagefd < 0) imagefd = 0;
432 else area.flags ^= MAP_ANONYMOUS;
433 }
434
435 /* Create the memory area */
436
437 if (area.flags & MAP_ANONYMOUS) {
438 DPRINTF (("mtcp restoreverything*: restoring anonymous area %p at %p\n", area.size, area.addr));
439 } else {
440 DPRINTF (("mtcp restoreverything*: restoring to non-anonymous area from anonymous area %p at %p from %s + 0x%X\n", area.size, area.addr, area.name, area.offset));
441 }
442 /* POSIX says mmap would unmap old memory. Munmap never fails if args
443 * are valid. Can we unmap vdso and vsyscall in Linux? Used to use
444 * mtcp_safemmap here to check for address conflicts.
445 */
446 mmappedat = mtcp_sys_mmap (area.addr, area.size, area.prot | PROT_WRITE,
447 area.flags, imagefd, area.offset);
448 if (mmappedat == MAP_FAILED) {
449 DPRINTF(("mtcp_restart_nolibc: error %d mapping %p bytes at %p\n",
450 mtcp_sys_errno, area.size, area.addr));
451
452 try_skipping_existing_segment = 1;
453 }
454 if (mmappedat != area.addr && !try_skipping_existing_segment) {
455 mtcp_printf ("mtcp_restart_nolibc: area at %p got mmapped to %p\n",
456 area.addr, mmappedat);
457 mtcp_abort ();
458 }
459
460 /* Read saved area contents */
461 readcs (CS_AREACONTENTS);
462 if (try_skipping_existing_segment)
463 #ifdef BUG_64BIT_2_6_9
464 # if 0
465 // This fails on teracluster. Presumably extra symbols cause overflow.
466 {
467 char tmpbuf[4];
468 int i;
469 /* slow, but rare case; and only for old Linux 2.6.9 */
470 for ( i = 0; i < area.size / 4; i++ )
471 readfile (tmpbuf, 4);
472 }
473 # else
474 // This fails in CERN Linux 2.6.9; can't readfile on top of vsyscall
475 readfile (area.addr, area.size);
476 # endif
477 #else
478 # ifdef __x86_64__
479 // This fails on teracluster. Presumably extra symbols cause overflow.
480 skipfile (area.size);
481 # else
482 // With Red Hat Release 5.2, Red Hat allows vdso to go almost anywhere.
483 // If we were unlucky and it was randomized onto our memory area, re-exec.
484 // In the future, a cleaner fix will be a linker script to reserve
485 // or even load our own memory section at fixed addresses, so that
486 // vdso will be placed elsewhere.
487 // This patch is not safe, because there are unnamed sections that
488 // might be required. But early 32-bit Linux kernels also don't name
489 // [vdso] in the /proc filesystem, and it's safe to skipfile() there.
490 // This code is based on what's in mtcp_check_vdso.c .
491 { if (area.name[0] == '/' /* If not null string, not [stack] or [vdso] */
492 && global_vdso_addr >= (VA)area.addr
493 && global_vdso_addr < (VA)area.addr + area.size
494 ) {
495 DPRINTF(("randomized vdso conflict; retrying\n"));
496 mtcp_sys_close (mtcp_restore_cpfd);
497 mtcp_restore_cpfd = -1;
498 if (-1 == mtcp_sys_execve(mtcp_restore_cmd_file,
499 mtcp_restore_argv, mtcp_restore_envp))
500 DPRINTF(("execve failed. Restart may fail.\n"));
501 } else
502 skipfile (area.size);
503 }
504 # endif
505 #endif
506 else {
507 /* This mmapfile after prev. mmap is okay; use same args again.
508 * Posix says prev. map will be munmapped.
509 */
510 /* ANALYZE THE CONDITION FOR DOING mmapfile MORE CAREFULLY. */
511 if (do_mmap_ckpt_image
512 && mystrstr(area.name, "[vdso]")
513 && mystrstr(area.name, "[vsyscall]"))
514 mmapfile (area.addr, area.size, area.prot | PROT_WRITE, area.flags);
515 else
516 readfile (area.addr, area.size);
517 if (!(area.prot & PROT_WRITE))
518 if (mtcp_sys_mprotect (area.addr, area.size, area.prot) < 0) {
519 mtcp_printf ("mtcp_restart_nolibc: error %d write-protecting %p bytes at %p\n",
520 mtcp_sys_errno, area.size, area.addr);
521 mtcp_abort ();
522 }
523 }
524
525 /* Close image file (fd only gets in the way) */
526 if (!(area.flags & MAP_ANONYMOUS)) mtcp_sys_close (imagefd);
527 }
528
529 /* CASE NOT MAP_ANONYMOUS: */
530 /* Otherwise, we mmap the original file contents to the area */
531
532 else {
533 DPRINTF (("mtcp restoreverything*: restoring mapped area %p at %p to %s + 0x%X\n", area.size, area.addr, area.name, area.offset));
534 flags = 0; // see how to open it based on the access required
535 // O_RDONLY = 00
536 // O_WRONLY = 01
537 // O_RDWR = 02
538 if (area.prot & PROT_WRITE) flags = O_WRONLY;
539 if (area.prot & (PROT_EXEC | PROT_READ)){
540 flags = O_RDONLY;
541 if (area.prot & PROT_WRITE) flags = O_RDWR;
542 }
543
544 if (area.prot & MAP_SHARED) {
545 read_shared_memory_area_from_file(&area, flags);
546
547 } else { /* not MAP_ANONYMOUS, not MAP_SHARED */
548 imagefd = mtcp_sys_open (area.name, flags, 0); // open it
549
550 /* CASE NOT MAP_ANONYMOUS, MAP_PRIVATE, backing file doesn't exist: */
551 if (imagefd < 0) {
552 mtcp_printf ("mtcp_restart_nolibc: error %d opening mmap file %s\n",
553 mtcp_sys_errno, area.name);
554 mtcp_abort ();
555 }
556
557 /* CASE NOT MAP_ANONYMOUS, and MAP_PRIVATE, */
558 mmappedat = mtcp_sys_mmap (area.addr, area.size, area.prot,
559 area.flags, imagefd, area.offset);
560 if (mmappedat == MAP_FAILED) {
561 mtcp_printf ("mtcp_restart_nolibc: error %d mapping %s offset %d at %p\n",
562 mtcp_sys_errno, area.name, area.offset, area.addr);
563 mtcp_abort ();
564 }
565 if (mmappedat != area.addr) {
566 mtcp_printf ("mtcp_restart_nolibc: area at %p got mmapped to %p\n",
567 area.addr, mmappedat);
568 mtcp_abort ();
569 }
570 mtcp_sys_close (imagefd); // don't leave dangling fd
571
572 readcs (CS_AREACONTENTS);
573
574 // If we have write permission on file and memory area (data segment),
575 // then we use data in checkpoint image.
576 // In the case of DMTCP, multiple processes may duplicate this work.
577 if ( (imagefd = mtcp_sys_open(area.name, O_WRONLY, 0)) >= 0
578 && ( (flags == O_WRONLY || flags == O_RDWR) ) ) {
579 mtcp_printf ("mtcp_restart_nolibc: mapping %s with data from ckpt image\n",
580 area.name);
581 readfile (area.addr, area.size);
582 }
583 // If we have no write permission on file, then we should use data
584 // from version of file at restart-time (not from checkpoint-time).
585 // Because Linux library files have execute permission,
586 // the dynamic libraries from time of checkpoint will be used.
587 // NOTE: man 2 access: access may not work correctly on NFS file
588 // systems with UID mapping enabled, because UID mapping is done
589 // on the server and hidden from the client, which checks permissions.
590 else {
591 /* read-exec permission ==> executable library, unlikely to change;
592 * For example, gconv-modules.cache is shared with read-exec perm.
593 * If read-only permission, warn user that we're using curr. file.
594 */
595 if (-1 == mtcp_sys_access(area.name, X_OK))
596 mtcp_printf ("MTCP: mtcp_restart_nolibc: mapping current version "
597 "of %s into memory;\n"
598 " _not_ file as it existed at time of checkpoint.\n"
599 " Change %s:%d and re-compile, if you want different "
600 "behavior.\n",
601 area.name, __FILE__, __LINE__);
602
603 // We want to skip the checkpoint file pointer
604 // and move to the end of the shared file data. We can't
605 // use lseek() function as it can fail if we are using a pipe to read
606 // the contents of checkpoint file (we might be using gzip to
607 // uncompress checkpoint file on the fly). Thus we have to read or
608 // skip contents using skipfile().
609 skipfile (area.size);
610 }
611 if (imagefd >= 0)
612 mtcp_sys_close (imagefd); // don't leave dangling fd
613 }
614 }
615
616 if (area.name && mystrstr(area.name, "[heap]")
617 && mtcp_sys_brk(NULL) != area.addr + area.size)
618 DPRINTF(("WARNING: break (%p) not equal to end of heap (%p)\n",
619 mtcp_sys_brk(NULL), area.addr + area.size));
620 }
621 }
622
623 /*
624 * CASE NOT MAP_ANONYMOUS, MAP_SHARED :
625 *
626 * If the shared file does NOT exist on the system, the restart process creates
627 * the file on the disk and writes the contents from the ckpt image into this
628 * recreated file. The file is later mapped into memory with MAP_SHARED and
629 * correct protection flags.
630 *
631 * If the file already exists on the disk, there are two possible scenerios as
632 * follows:
633 * 1. The shared memory has WRITE access: In this case it is possible that the
634 * file was modified by the checkpoint process and so we restore the file
635 * contents from the checkpoint image. In doing so, we can fail however if
636 * we do not have sufficient access permissions.
637 * 2. The shared memory has NO WRITE access: In this case, we use the current
638 * version of the file rather than the one that existed at checkpoint time.
639 * We map the file with correct flags and discard the checkpointed copy of
640 * the file contents.
641 *
642 * Other than these, if we can't access the file, we print an error message
643 * and quit.
644 */
645 static void read_shared_memory_area_from_file(Area* area, int flags)
646 {
647 void *mmappedat;
648 int areaContentsAlreadyRead = 0;
649 int imagefd, rc;
650
651 if (!(area->prot & MAP_SHARED)) {
652 mtcp_printf("read_shared_memory_area_from_file: Illegal function call\n");
653 mtcp_abort();
654 }
655
656 /* Check to see if the filename ends with " (deleted)" */
657 const char* deleted_file_suffix = " (deleted)";
658 if (mtcp_strendswith(area->name, deleted_file_suffix)) {
659 size_t len = mtcp_strlen(area->name);
660 area->name [ mtcp_strlen(area->name) - mtcp_strlen(deleted_file_suffix) ] = '\0';
661 }
662
663 imagefd = mtcp_sys_open (area->name, flags, 0); // open it
664
665 if (imagefd < 0 && mtcp_sys_errno != ENOENT) {
666 mtcp_printf ("mtcp_restart_nolibc: error %d opening mmap file %s"
667 "with flags:%d\n", mtcp_sys_errno, area->name, flags);
668 mtcp_abort();
669 }
670
671 if (imagefd < 0) {
672
673 // If the shared file doesn't exist on the disk, we try to create it
674 DPRINTF(("mtcp restoreverything*: Shared file %s not found, Creating new\n",area->name));
675
676 /* Dangerous for DMTCP: Since file is created with O_CREAT, */
677 /* hopefully, a second process should ignore O_CREAT and just */
678 /* duplicate the work of the first process, with no ill effect.*/
679 imagefd = open_shared_file(area->name);
680
681 /* Acquire write lock on the file before writing anything to it
682 * If we don't, then there is a weird RACE going on between the
683 * restarting processes which causes problems with mmap()ed area for
684 * this file and hence the restart fails. We still don't know the
685 * reason for it. --KAPIL
686 * NOTE that we don't need to unlock the file as it will be
687 * automatically done when we close it.
688 */
689 lock_file(imagefd, area->name, F_WRLCK);
690
691 // create a temp area in the memory exactly of the size of the
692 // shared file. We read the contents of the shared file from
693 // checkpoint file(.mtcp) into system memory. From system memory,
694 // the contents are written back to newly created replica of the shared
695 // file (at the same path where it used to exist before checkpoint).
696 mmappedat = mtcp_sys_mmap (area->addr, area->size, PROT_READ | PROT_WRITE,
697 MAP_PRIVATE | MAP_ANONYMOUS, imagefd,
698 area->offset);
699 if (mmappedat == MAP_FAILED) {
700 mtcp_printf ("mtcp_restart_nolibc: error %d mapping temp memory at %p\n",
701 mtcp_sys_errno, area->addr);
702 mtcp_abort ();
703 }
704
705 readcs (CS_AREACONTENTS);
706 readfile (area->addr, area->size);
707 areaContentsAlreadyRead = 1;
708
709 if ( mtcp_sys_write(imagefd, area->addr,area->size) < 0 ){
710 mtcp_printf ("mtcp_restart_nolibc: error %d creating mmap file %s\n",
711 mtcp_sys_errno, area->name);
712 mtcp_abort();
713 }
714
715 // unmap the temp memory allocated earlier
716 rc = mtcp_sys_munmap (area->addr, area->size);
717 if (rc == -1) {
718 mtcp_printf ("mtcp_restart_nolibc: error %d unmapping temp memory at %p\n",
719 mtcp_sys_errno, area->addr);
720 mtcp_abort ();
721 }
722
723 // set file permissions as per memory area protection.
724 int fileprot = 0;
725 if (area->prot & PROT_READ) fileprot |= S_IRUSR;
726 if (area->prot & PROT_WRITE) fileprot |= S_IWUSR;
727 if (area->prot & PROT_EXEC) fileprot |= S_IXUSR;
728 mtcp_sys_fchmod(imagefd, fileprot);
729
730 //close the file
731 mtcp_sys_close(imagefd);
732
733 // now open the file again, this time with appropriate flags
734 imagefd = mtcp_sys_open (area->name, flags, 0);
735 if (imagefd < 0){
736 mtcp_printf ("mtcp_restart_nolibc: error %d opening mmap file %s\n",
737 mtcp_sys_errno, area->name);
738 mtcp_abort ();
739 }
740 } else { /* else file exists */
741 /* Acquire read lock on the shared file before doing an mmap. See
742 * detailed comments above.
743 */
744 DPRINTF(("Acquiring lock on shared file :%s\n", area->name));
745 lock_file(imagefd, area->name, F_RDLCK);
746 DPRINTF(("After Acquiring lock on shared file :%s\n", area->name));
747 }
748
749 mmappedat = mtcp_sys_mmap (area->addr, area->size, area->prot,
750 area->flags, imagefd, area->offset);
751 if (mmappedat == MAP_FAILED) {
752 mtcp_printf ("mtcp_restart_nolibc: error %d mapping %s offset %d at %p\n",
753 mtcp_sys_errno, area->name, area->offset, area->addr);
754 mtcp_abort ();
755 }
756 if (mmappedat != area->addr) {
757 mtcp_printf ("mtcp_restart_nolibc: area at %p got mmapped to %p\n",
758 area->addr, mmappedat);
759 mtcp_abort ();
760 }
761
762 if ( areaContentsAlreadyRead == 0 ){
763 readcs (CS_AREACONTENTS);
764
765 #if 0
766 // If we have write permission or execute permission on file,
767 // then we use data in checkpoint image,
768 // If MMAP_SHARED, this reverts the file to data at time of checkpoint.
769 // In the case of DMTCP, multiple processes may duplicate this work.
770 // NOTE: man 2 access: access may not work correctly on NFS file
771 // systems with UID mapping enabled, because UID mapping is done
772 // on the server and hidden from the client, which checks permissions.
773 /* if (flags == O_WRONLY || flags == O_RDWR) */
774 if ( ( (imagefd = mtcp_sys_open(area->name, O_WRONLY, 0)) >= 0
775 && ( (flags == O_WRONLY || flags == O_RDWR) ) )
776 || (0 == mtcp_sys_access(area->name, X_OK)) ) {
777
778 mtcp_printf ("mtcp_restart_nolibc: mapping %s with data from ckpt image\n",
779 area->name);
780 readfile (area->addr, area->size);
781 mtcp_sys_close (imagefd); // don't leave dangling fd
782 }
783 #else
784 if (area->prot & PROT_WRITE) {
785 mtcp_printf ("mtcp_restart_nolibc: mapping %s with data from ckpt image\n",
786 area->name);
787 readfile (area->addr, area->size);
788 }
789 #endif
790 // If we have no write permission on file, then we should use data
791 // from version of file at restart-time (not from checkpoint-time).
792 // Because Linux library files have execute permission,
793 // the dynamic libraries from time of checkpoint will be used.
794
795 // If we haven't created the file (i.e. the shared file _does_ exist
796 // when this process wants to map it) and the memory area does not have
797 // WRITE access, we want to skip the checkpoint
798 // file pointer and move to the end of the shared file. We can not
799 // use lseek() function as it can fail if we are using a pipe to read
800 // the contents of checkpoint file (we might be using gzip to
801 // uncompress checkpoint file on the fly). Thus we have to read or
802 // skip contents using the following code.
803
804 // NOTE: man 2 access: access may not work correctly on NFS file
805 // systems with UID mapping enabled, because UID mapping is done
806 // on the server and hidden from the client, which checks permissions.
807 else {
808 /* read-exec permission ==> executable library, unlikely to change;
809 * For example, gconv-modules.cache is shared with read-exec perm.
810 * If read-only permission, warn user that we're using curr. file.
811 */
812 if (imagefd >= 0 && -1 == mtcp_sys_access(area->name, X_OK)) {
813 if (mtcp_strstartswith(area->name, "/usr/") ||
814 mtcp_strstartswith(area->name, "/var/")) {
815 DPRINTF(("MTCP: mtcp_restart_nolibc: mapping current version "
816 "of %s into memory;\n"
817 " _not_ file as it existed at time of checkpoint.\n"
818 " Change %s:%d and re-compile, if you want different "
819 "behavior.\n",
820 area->name, __FILE__, __LINE__));
821 } else {
822 mtcp_printf("MTCP: mtcp_restart_nolibc: mapping current version "
823 "of %s into memory;\n"
824 " _not_ file as it existed at time of checkpoint.\n"
825 " Change %s:%d and re-compile, if you want different "
826 "behavior. %d: %d\n",
827 area->name, __FILE__, __LINE__);
828 }
829 }
830 skipfile (area->size);
831 }
832 }
833 if (imagefd >= 0)
834 mtcp_sys_close (imagefd); // don't leave dangling fd in way of other stuff
835 }
836
837 static void readcs (char cs)
838
839 {
840 char xcs;
841
842 readfile (&xcs, sizeof xcs);
843 if (xcs != cs) {
844 mtcp_printf ("mtcp readcs: checkpoint section %d next, expected %d\n", xcs, cs);
845 mtcp_abort ();
846 }
847 }
848
849 static void readfile(void *buf, size_t size)
850 {
851 ssize_t rc;
852 size_t ar = 0;
853 int tries = 0;
854
855 while(ar != size)
856 {
857 rc = mtcp_sys_read(mtcp_restore_cpfd, buf + ar, size - ar);
858 if (rc < 0)
859 {
860 mtcp_printf("mtcp_restart_nolibc readfile: error %d reading checkpoint\n", mtcp_sys_errno);
861 mtcp_abort();
862 }
863 else if (rc == 0)
864 {
865 mtcp_printf("mtcp_restart_nolibc readfile: only read %zu bytes instead of %zu from checkpoint file\n", ar, size);
866 if (tries++ >= 10) {
867 mtcp_printf("mtcp_restart_nolibc readfile:" \
868 " failed to read after 10 tries in a row.\n");
869 mtcp_abort();
870 }
871 }
872
873 ar += rc;
874 }
875 }
876
877 static void mmapfile(void *buf, size_t size, int prot, int flags)
878 {
879 void *addr;
880 off_t rc;
881 size_t ar;
882 ar = 0;
883
884 /* Use mmap for this portion of checkpoint image. */
885 addr = mtcp_sys_mmap(buf, size, prot, flags, mtcp_restore_cpfd, 0);
886 if (addr != buf) {
887 if (addr == MAP_FAILED)
888 mtcp_printf("mtcp_restart_nolibc mmapfile:"
889 " error %d reading checkpoint file\n", mtcp_sys_errno);
890 else
891 mtcp_printf("mmapfile: Requested address %p, but got address %p\n",
892 buf, addr);
893 mtcp_abort();
894 }
895 /* Now update mtcp_restore_cpfd so as to work the same way as readfile() */
896 rc = mtcp_sys_lseek(mtcp_restore_cpfd, size, SEEK_CUR);
897 }
898
899 static void skipfile(size_t size)
900 {
901 size_t ar;
902 ssize_t rc;
903 ar = 0;
904 char array[512];
905
906 while(ar != size)
907 {
908 rc = mtcp_sys_read(mtcp_restore_cpfd, array, (size-ar < 512 ? size - ar : 512));
909 if(rc < 0)
910 {
911 mtcp_printf("mtcp_restart_nolibc skipfile: error %d skipping checkpoint\n", mtcp_sys_errno);
912 mtcp_abort();
913 }
914 else if(rc == 0)
915 {
916 mtcp_printf("mtcp_restart_nolibc skipfile: only skipped %zu bytes instead of %zu from checkpoint file\n", ar, size);
917 mtcp_abort();
918 }
919
920 ar += rc;
921 }
922 }
923
924 #if 1
925 /* Modelled after mtcp_safemmap. - Gene */
926 static VA highest_userspace_address (VA *vdso_addr, VA *vsyscall_addr,
927 VA *stack_end_addr)
928 {
929 char c;
930 int mapsfd, i;
931 VA endaddr, startaddr;
932 VA highaddr = 0; /* high stack address should be highest userspace addr */
933 const char *stackstring = "[stack]";
934 const char *vdsostring = "[vdso]";
935 const char *vsyscallstring = "[vsyscall]";
936 const int bufsize = 1 + sizeof "[vsyscall]"; /* largest of last 3 strings */
937 char buf[bufsize];
938
939 buf[0] = '\0';
940 buf[bufsize - 1] = '\0';
941
942 /* Scan through the mappings of this process */
943
944 mapsfd = mtcp_sys_open ("/proc/self/maps", O_RDONLY, 0);
945 if (mapsfd < 0) {
946 mtcp_printf("couldn't open /proc/self/maps\n");
947 mtcp_abort();
948 }
949
950 *vdso_addr = (VA)NULL;
951 while (1) {
952
953 /* Read a line from /proc/self/maps */
954
955 c = mtcp_readhex (mapsfd, &startaddr);
956 if (c == '\0') break;
957 if (c != '-') continue; /* skip to next line */
958 c = mtcp_readhex (mapsfd, &endaddr);
959 if (c == '\0') break;
960 if (c != ' ') continue; /* skip to next line */
961
962 while ((c != '\0') && (c != '\n')) {
963 if (c != ' ') {
964 for (i = 0; (i < bufsize) && (c != ' ')
965 && (c != 0) && (c != '\n'); i++) {
966 buf[i] = c;
967 c = mtcp_readchar (mapsfd);
968 }
969 } else {
970 c = mtcp_readchar (mapsfd);
971 }
972 }
973
974 if (0 == mtcp_strncmp(buf, stackstring, mtcp_strlen(stackstring))) {
975 *stack_end_addr = endaddr;
976 highaddr = endaddr; /* We found "[stack]" in /proc/self/maps */
977 }
978
979 if (0 == mtcp_strncmp(buf, vdsostring, mtcp_strlen(vdsostring))) {
980 *vdso_addr = startaddr;
981 highaddr = endaddr; /* We found "[vdso]" in /proc/self/maps */
982 }
983
984 if (0 == mtcp_strncmp(buf, vsyscallstring, mtcp_strlen(vsyscallstring))) {
985 *vsyscall_addr = startaddr;
986 highaddr = endaddr; /* We found "[vsyscall]" in /proc/self/maps */
987 }
988 }
989
990 mtcp_sys_close (mapsfd);
991
992 return (VA)highaddr;
993 }
994
995 #else
996 /* Added by Gene Cooperman */
997 static VA highest_userspace_address (VA *vdso_addr, VA *vsyscall_address,
998 VA *stack_end_addr)
999 {
1000 Area area;
1001 VA area_end = 0;
1002 int mapsfd;
1003 char *p;
1004
1005 mapsfd = open ("/proc/self/maps", O_RDONLY);
1006 if (mapsfd < 0) {
1007 mtcp_printf ("mtcp highest_userspace_address:"
1008 " error opening /proc/self/maps: errno: %d\n", errno);
1009 mtcp_abort ();
1010 }
1011
1012 *vdso_addr = NULL; /* Default to NULL if not found. */
1013 while (readmapsline (mapsfd, &area)) {
1014 /* Gcc expands strstr() inline, but it's safer to use our own function. */
1015 p = mystrstr (area.name, "[stack]");
1016 if (p != NULL)
1017 area_end = (VA)area.addr + area.size;
1018 p = mystrstr (area.name, "[vdso]");
1019 if (p != NULL)
1020 *vdso_addr = area.addr;
1021 p = mystrstr (area.name, "[vsyscall]");
1022 if (p != NULL)
1023 *vsyscall_addr = area.addr;
1024 p = mystrstr (area.name, "[stack]");
1025 if (p != NULL)
1026 *stack_end_addr = area.addr + addr.size;
1027 p = mystrstr (area.name, "[vsyscall]");
1028 if (p != NULL) /* vsyscall is highest section, when it exists */
1029 return area.addr;
1030 }
1031
1032 close (mapsfd);
1033 return area_end;
1034 }
1035 #endif
1036
1037 #if 1
1038 static void lock_file(int fd, char* name, short l_type)
1039 {
1040 struct flock fl;
1041
1042 fl.l_type = l_type; /* F_RDLCK, F_WRLCK, F_UNLCK */
1043 fl.l_whence = SEEK_SET; /* SEEK_SET, SEEK_CUR, SEEK_END */
1044 fl.l_start = 0; /* Offset from l_whence */
1045 fl.l_len = 0; /* length, 0 = to EOF */
1046
1047 int result = -1;
1048 mtcp_sys_errno = 0;
1049 while (result == -1 || mtcp_sys_errno == EINTR )
1050 result = mtcp_sys_fcntl3(fd, F_SETLKW, &fl); /* F_GETLK, F_SETLK, F_SETLKW */
1051
1052 /* Coverity static analyser stated the following code as DEAD. It is not
1053 * DEADCODE because it is possible that mtcp_sys_fcntl3() fails with some
1054 * error other than EINTR
1055 */
1056 if ( result == -1 ) {
1057 mtcp_printf("mtcp_restart_nolibc lock_file: error %d locking shared file: %s\n",
1058 mtcp_sys_errno, name);
1059 mtcp_abort();
1060 }
1061 }
1062
1063 static int open_shared_file(char* fileName)
1064 {
1065 int i;
1066 int fd;
|
Event var_decl: |
Declaring variable "fIndex" without initializer. |
| Also see events: |
[uninit_use] |
1067 int fIndex;
1068 char currentFolder[FILENAMESIZE];
1069
1070 /* Find the starting index where the actual filename begins */
|
At conditional (1): "fileName[i] != 0": Taking true branch.
|
|
At conditional (3): "fileName[i] != 0": Taking false branch.
|
1071 for ( i=0; fileName[i] != '\0'; i++ ){
|
At conditional (2): "fileName[i] == 47": Taking false branch.
|
1072 if ( fileName[i] == '/' )
1073 fIndex = i+1;
1074 }
1075
1076 /* We now try to create the directories structure from the give path */
|
Event uninit_use: |
Using uninitialized value "fIndex". |
| Also see events: |
[var_decl] |
1077 for ( i=0 ; i< fIndex; i++ ){
1078 if (fileName[i] == '/' && i > 0){
1079 int res;
1080 currentFolder[i] = '\0';
1081 res = mtcp_sys_mkdir(currentFolder, S_IRWXU);
1082 if (res<0 && mtcp_sys_errno != EEXIST ){
1083 mtcp_printf("mtcp_restart_nolibc open_shared_file: error %d creating directory %s in path of %s\n", mtcp_sys_errno, currentFolder, fileName);
1084 mtcp_abort();
1085 }
1086 }
1087 currentFolder[i] = fileName[i];
1088 }
1089
1090 /* Create the file */
1091 fd = mtcp_sys_open(fileName, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR);
1092 if (fd<0){
1093 mtcp_printf("mtcp_restart_nolibc open_shared_file: unable to create file %s\n", fileName);
1094 mtcp_abort();
1095 }
1096 return fd;
1097 }
1098 #endif