1 /*****************************************************************************
2 * Copyright (C) 2006-2010 by Michael Rieker, Jason Ansel, Kapil Arya, and *
3 * Gene Cooperman *
4 * mrieker@nii.net, jansel@csail.mit.edu, kapil@ccs.neu.edu, and *
5 * gene@ccs.neu.edu *
6 * *
7 * This file is part of the MTCP module of DMTCP (DMTCP:mtcp). *
8 * *
9 * DMTCP:mtcp is free software: you can redistribute it and/or *
10 * modify it under the terms of the GNU Lesser General Public License as *
11 * published by the Free Software Foundation, either version 3 of the *
12 * License, or (at your option) any later version. *
13 * *
14 * DMTCP:dmtcp/src is distributed in the hope that it will be useful, *
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
17 * GNU Lesser General Public License for more details. *
18 * *
19 * You should have received a copy of the GNU Lesser General Public *
20 * License along with DMTCP:dmtcp/src. If not, see *
21 * <http://www.gnu.org/licenses/>. *
22 *****************************************************************************/
23
24 /* To test: gcc -DSTANDALONE THIS_FILE; ./a.out */
25
26 #include <fcntl.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <sys/types.h>
30 #include <sys/stat.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <sys/utsname.h> /* uname */
34 #include <sys/time.h>
35 #include <sys/resource.h> /* getrlimit, setrlimit */
36 #include <sys/personality.h>
37 #define ADDR_NO_RANDOMIZE 0x0040000 /* In case of old Linux, not defined */
38 #define ADDR_COMPAT_LAYOUT 0x0200000 /* Not yet defined as of Ubuntu 8.04 */
39 #include <unistd.h>
40 #include <errno.h>
41 #include <elf.h> // For value of AT_SYSINFO, Elf??_auxv_t
42 #include "mtcp_sys.h" // For CLEAN_FOR_64BIT
43 #include "mtcp_internal.h" // For CLEAN_FOR_64BIT and MAXPATHLEN
44
45 // We turn off va_addr_rand(/proc/sys/kernel/randomize_va_space).
46 // For a _given_ binary,
47 // this fixes the address of the vdso. Luckily, on restart, we
48 // get our vdso from mtcp_restart. So, we need to maintain two
49 // vdso segments: one from the user binary and one from each
50 // invocation of mtcp_restart during iterated restarts.
51 # define NO_RAND_VA_PERSONALITY 1
52
53 //======================================================================
54 // Get and set AT_SYSINFO for purposes of patching address in vdso
55
56 #ifdef __x86_64__
57 # define ELF_AUXV_T Elf64_auxv_t
58 # define UINT_T uint64_t
59 #else
60 # define ELF_AUXV_T Elf32_auxv_t
61 # define UINT_T uint32_t
62 #endif
63
64 // Returns value for AT_SYSINFO in kernel's auxv
65 // Ideally: mtcp_at_sysinfo() == *mtcp_addr_sysinfo()
66 // Best if we call this early, before the user makes problems
67 // by moving environment variables, putting in a weird stack, etc.
68 extern char **environ;
69 static void * get_at_sysinfo() {
70 void **stack;
71 int i;
72 ELF_AUXV_T *auxv;
73 static char **my_environ = NULL;
74
75 if (my_environ == NULL)
76 my_environ = environ;
77 #if 0
78 // Walk the stack.
79 asm volatile (CLEAN_FOR_64_BIT(mov %%ebp, %0\n\t)
80 : "=g" (stack) );
81 mtcp_printf("stack 2: %p\n", stack);
82
83 // When popping stack/%ebp yields zero, that's the ELF loader telling us that
84 // this is "_start", the first call frame, which was created by ELF.
85 for ( ; *stack != NULL; stack = *stack )
86 ;
87
88 // Go beyond first call frame:
89 // Next look for &(argv[argc]) on stack; (argv[argc] == NULL)
90 for (i = 1; stack[i] != NULL; i++)
91 ;
92 // Do some error checks
93 if ( &(stack[i]) - stack > 100000 ) {
94 mtcp_printf("Error: overshot stack\n");
95 exit(1);
96 }
97 stack = &stack[i];
98 #else
99 stack = (void **)&my_environ[-1];
100 if (*stack != NULL) {
101 mtcp_printf("This should be argv[argc] == NULL and it's not.\n"
102 "NO &argv[argc], stack: %p\n", stack);
103 exit(1);
104 }
105 #endif
106 // stack[-1] should be argv[argc-1]
107 if ( (void **)stack[-1] < stack || (void **)stack[-1] > stack + 100000 ) {
108 mtcp_printf("candidate argv[argc-1] failed consistency check\n");
109 exit(1);
110 }
111 for (i = 1; stack[i] != NULL; i++)
112 if ( (void **)stack[i] < stack || (void **)stack[i] > stack + 10000 ) {
113 mtcp_printf("candidate argv[%d] failed consistency check\n", i);
114 exit(1);
115 }
116 stack = &stack[i+1];
117 // Now stack is beginning of auxiliary vector (auxv)
118 // auxv->a_type = AT_NULL marks the end of auxv
119 for (auxv = (ELF_AUXV_T *)stack; auxv->a_type != AT_NULL; auxv++) {
120 // mtcp_printf("0x%x 0x%x\n", auxv->a_type, auxv->a_un.a_val);
121 if ( auxv->a_type == (UINT_T)AT_SYSINFO ) {
122 mtcp_printf("AT_SYSINFO (at 0x%p) is: 0x%lx\n",
123 &auxv->a_un.a_val, auxv->a_un.a_val);
124 return (void *)auxv->a_un.a_val;
125 }
126 }
127 return NULL; /* Couldn't find AT_SYSINFO */
128 }
129
130 // From glibc-2.7: glibc-2.7/nptl/sysdeps/i386/tls.h
131 // SYSINFO_OFFSET given by:
132 // #include "glibc-2.7/nptl/sysdeps/i386/tls.h"
133 // tcbhead_t dummy;
134 // #define SYSINFO_OFFSET &(dummy.sysinfo) - &dummy
135
136 // Some reports say it was 0x18 in past. Should we also check that?
137 #define DEFAULT_SYSINFO_OFFSET "0x10"
138
139 int mtcp_have_thread_sysinfo_offset() {
140 #ifdef RESET_THREAD_SYSINFO
141 static int result = -1; // Reset to 0 or 1 on first call.
142 #else
143 static int result = 0;
144 #endif
145 if (result == -1) {
146 void * sysinfo;
147 asm (CLEAN_FOR_64_BIT(mov %%gs:) DEFAULT_SYSINFO_OFFSET ", %0\n\t"
148 : "=r" (sysinfo));
149 result = (sysinfo == get_at_sysinfo());
150 }
151 return result;
152 }
153
154 // AT_SYSINFO is what kernel calls sysenter address in vdso segment.
155 // Kernel saves it for each thread in %gs:SYSINFO_OFFSEt ??
156 // as part of kernel TCB (thread control block) at beginning of TLS ??
157 void *mtcp_get_thread_sysinfo() {
158 void *sysinfo;
159 asm volatile (CLEAN_FOR_64_BIT(mov %%gs:) DEFAULT_SYSINFO_OFFSET ", %0\n\t"
160 : "=r" (sysinfo) );
161 return sysinfo;
162 }
163
164 void mtcp_set_thread_sysinfo(void *sysinfo) {
165 asm volatile (CLEAN_FOR_64_BIT(mov %0, %%gs:) DEFAULT_SYSINFO_OFFSET "\n\t"
166 : : "r" (sysinfo) );
167 }
168
169 //======================================================================
170 // Used to check if vdso is an issue
171
172 #define MAX_ARGS 500
173 static int write_args(char **vector, char *filename) {
174 ssize_t i;
175 int retval, fd;
176 char strings[10001];
177 char *str = strings;
178
|
At conditional (1): "-1 == (fd = open(filename, 0))": Taking false branch.
|
179 if (-1 == (fd = open(filename, O_RDONLY))) {
180 perror("open");
181 exit(1);
182 }
|
Event overrun-local: |
Overrunning static array "strings", with 10001 elements, at position 10001 with index variable "10001". |
183 strings[10001] = '\0';
184 ssize_t num_read = mtcp_read_all(fd, strings, 10000);
185 close(fd);
186
187 if (num_read == -1)
188 return -1;
189
190 for (i = 0; str - strings < num_read && i < MAX_ARGS; i++) {
191 vector[i] = str;
192 while (*str++ != '\0')
193 ;
194 }
195 vector[i] = NULL;
196 return 0;
197 }
198
199 static unsigned long getenv_oldpers() {
200 unsigned long oldpers = 0;
201 char *oldpers_str = getenv("MTCP_OLDPERS");
202 if (oldpers_str == NULL) {
203 mtcp_printf("MTCP: internal error: %s:%d\n", __FILE__, __LINE__);
204 exit(1);
205 }
206 while (*oldpers_str != '\0')
207 oldpers = (oldpers << 1) + (*oldpers_str++ == '1' ? 1 : 0);
208 return oldpers;
209 }
210
211 static int setenv_oldpers(int oldpers) {
212 static char oldpers_str[sizeof(oldpers)*8+1];
213 int i = sizeof(oldpers_str);
214 oldpers_str[i--] = '\0';
215 while (i >= 0) {
216 oldpers_str[i--] = ((oldpers & 1) ? '1' : '0');
217 oldpers = oldpers >> 1;
218 }
219 return setenv("MTCP_OLDPERS", oldpers_str, 1);
220 }
221
222 /* Turn off randomize_va (by re-exec'ing) or warn user if vdso_enabled is on. */
223 void mtcp_check_vdso_enabled() {
224 char buf[1];
225 struct utsname utsname;
226 #ifdef RESET_THREAD_SYSINFO
227 get_at_sysinfo(); /* Initialize pointer to environ for later calls */
228 #endif
229
230 #ifdef NO_RAND_VA_PERSONALITY
231 /* Set ADDR_NO_RANDOMIZE bit;
232 * In Ubuntu Linux 2.6.24 kernel, This places vdso in a different
233 * fixed position in mtcp_init (since /lib/ld-2.7.so is inserted
234 * above [vdso] and below [stack]. mtcp_restart has no /lib/ld-2.7.so.
235 */
236 int pers = personality(0xffffffffUL); /* get current personality */
237 if (pers & ADDR_NO_RANDOMIZE) { /* if no addr space randomization ... */
238 if (getenv("MTCP_OLDPERS") != NULL) {
239 personality(getenv_oldpers()); /* restore orig pre-exec personality */
240 if (-1 == unsetenv("MTCP_OLDPERS"))
241 perror("unsetenv");
242 }
243 return; /* skip the rest */
244 }
245
246 if (! (pers & ADDR_NO_RANDOMIZE)) /* if addr space randomization ... */
247 {
248 unsigned long oldpers = pers;
249 /* then turn off randomization and (just in case) remove ADDR_COMPAT_LAYOUT*/
250 personality((pers | ADDR_NO_RANDOMIZE) & ~ADDR_COMPAT_LAYOUT);
251 if ( ADDR_NO_RANDOMIZE & personality(0xffffffffUL) ) /* if it's off now */
252 { char runtime[MAXPATHLEN+1];
253 int i = readlink("/proc/self/exe", runtime, MAXPATHLEN);
254 if ( i != -1)
255 { char *argv[MAX_ARGS+1];
256 extern char **environ;
257 struct rlimit rlim;
258
259 /* "make" has the capability to raise RLIMIT_STACK to infinity.
260 * This is a problem. When the kernel (2.6.24 or later) detects this,
261 * it falls back to an older "standard" memory layout for libs.
262 *
263 * "standard" memory layout puts [vdso] segment in low memory, which
264 * MTCP currently doesn't handle properly.
265 *
266 * glibc:nptl/sysdeps/<ARCH>/pthreaddef.h defines the default stack for
267 * pthread_create to be ARCH_STACK_DEFAULT_SIZE if rlimit is set to be
268 * unlimited. We follow the same default.
269 */
270 //#ifdef __x86_64__
271 //# define ARCH_STACK_DEFAULT_SIZE (32 * 1024 * 1024)
272 //#else
273 //# define ARCH_STACK_DEFAULT_SIZE (2 * 1024 * 1024)
274 //#endif
275 /*
276 * XXX: TODO: Due to some reason, manual restart of checkpointed
277 * processes fails if ARCH_STACK_DEFAULT_SIZE is less than 256MB. It
278 * has to do with VDSO. The location of VDSO section conflicts with the
279 * location of process libraries and hence it is unmapped which causes
280 * failure during thre restarting phase. If we set the stack limit to
281 * 256 MB or higher, we donot see this bug.
282 * It Should also be noted that the process will call setrlimit to set
283 * the resource limites to their pre-checkpoint values.
284 */
285 #define ARCH_STACK_DEFAULT_SIZE (256 * 1024 * 1024)
286
287 if ( -1 == getrlimit(RLIMIT_STACK, &rlim) ||
288 ( rlim.rlim_cur = rlim.rlim_max = ARCH_STACK_DEFAULT_SIZE,
289 setrlimit(RLIMIT_STACK, &rlim),
290 getrlimit(RLIMIT_STACK, &rlim),
291 rlim.rlim_max == RLIM_INFINITY )
292 ) {
293 mtcp_printf("Failed to reduce RLIMIT_STACK"
294 " below RLIM_INFINITY\n");
295 exit(1);
296 }
297 write_args(argv, "/proc/self/cmdline");
298 runtime[i] = '\0';
299 setenv_oldpers(oldpers);
300 execve(runtime, argv, environ);
301 }
302 if (-1 == personality(oldpers)) /* reset if we couldn't exec */
303 perror("personality");
304 }
305 }
306 #endif
307
308 /* We failed to turn off address space rand., but maybe vdso is not enabled
309 * On newer kernels, there is no /proc/sys/vm/vdso_enabled, we will cross our
310 * fingers and continue anyways.
311 */
312 FILE * stream = fopen("/proc/sys/vm/vdso_enabled", "r");
313 if (stream == NULL)
314 return; /* In older kernels, if it doesn't exist, it can't be enabled. */
315 clearerr(stream);
316 if (fread(buf, sizeof(buf[0]), 1, stream) < 1) {
317 if (ferror(stream)) {
318 perror("fread");
319 exit(1);
320 }
321 }
322 if (-1 == fclose(stream)) {
323 perror("fclose");
324 exit(1);
325 }
326 /* This call also caches AT_SYSINFO for use by mtcp_set_thread_sysinfo() */
327 if (mtcp_have_thread_sysinfo_offset())
328 return;
329 if (buf[0] == '1') {
330 mtcp_printf("\n\n\nPROBLEM: cat /proc/sys/vm/vdso_enabled returns 1\n"
331 " Further, I failed to find SYSINFO_OFFSET in TLS.\n"
332 " Can't work around this problem.\n"
333 " Please run this program again after doing as root:\n"
334 " echo 0 > /proc/sys/vm/vdso_enabled\n"
335 " Alternatively, upgrade kernel to one that allows for a personality\n"
336 " with ADDR_NO_RANDOMIZE in /usr/include/linux/personality.h.\n");
337 exit(1);
338 }
339 }
340
341 #ifdef STANDALONE
342 int main() {
343 mtcp_check_vdso_enabled();
344 system("echo ulimit -s | sh");
345 return 0;
346 }
347 #endif