1    	/****************************************************************************
2    	 *   Copyright (C) 2006-2010 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3    	 *   jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu              *
4    	 *                                                                          *
5    	 *   This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src).  *
6    	 *                                                                          *
7    	 *  DMTCP:dmtcp/src is free software: you can redistribute it and/or        *
8    	 *  modify it under the terms of the GNU Lesser General Public License as   *
9    	 *  published by the Free Software Foundation, either version 3 of the      *
10   	 *  License, or (at your option) any later version.                         *
11   	 *                                                                          *
12   	 *  DMTCP:dmtcp/src is distributed in the hope that it will be useful,      *
13   	 *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
14   	 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
15   	 *  GNU Lesser General Public License for more details.                     *
16   	 *                                                                          *
17   	 *  You should have received a copy of the GNU Lesser General Public        *
18   	 *  License along with DMTCP:dmtcp/src.  If not, see                        *
19   	 *  <http://www.gnu.org/licenses/>.                                         *
20   	 ****************************************************************************/
21   	
22   	#include "mtcpinterface.h"
23   	#include "syscallwrappers.h"
24   	#include "../jalib/jassert.h"
25   	#include "../jalib/jalloc.h"
26   	
27   	#include <dlfcn.h>
28   	#include <stdio.h>
29   	#include <stdlib.h>
30   	#include <string.h>
31   	#include <sys/syscall.h>
32   	#include "constants.h"
33   	#include "sockettable.h"
34   	#include <unistd.h>
35   	#include "uniquepid.h"
36   	#include "dmtcpworker.h"
37   	#include "virtualpidtable.h"
38   	#include "protectedfds.h"
39   	#include "../jalib/jfilesystem.h"
40   	#include "../jalib/jconvert.h"
41   	
42   	#ifdef PTRACE
43   	#include <sys/types.h>
44   	#include <sys/ptrace.h>
45   	#include <stdarg.h>
46   	#include <linux/unistd.h>
47   	#include <sys/syscall.h>
48   	#include <fcntl.h>
49   	#endif
50   	
51   	
52   	namespace
53   	{
54   	  static const char* REOPEN_MTCP = ( char* ) 0x1;
55   	
56   	  static void* find_and_open_mtcp_so()
57   	  {
58   	    dmtcp::string mtcpso = jalib::Filesystem::FindHelperUtility ( "libmtcp.so" );
59   	    void* handle = dlopen ( mtcpso.c_str(), RTLD_NOW );
60   	    JASSERT ( handle != NULL ) ( mtcpso ).Text ( "failed to load libmtcp.so" );
61   	    return handle;
62   	  }
63   	
64   	}
65   	
66   	#ifdef EXTERNAL_SOCKET_HANDLING
67   	static bool delayedCheckpoint = false;
68   	#endif
69   	
70   	extern "C" void* _get_mtcp_symbol ( const char* name )
71   	{
72   	  static void* theMtcpHandle = find_and_open_mtcp_so();
73   	
74   	  if ( name == REOPEN_MTCP )
75   	  {
76   	    JTRACE ( "reopening libmtcp.so" ) ( theMtcpHandle );
77   	    //must get ref count down to 0 so it is really unloaded
78   	    for( int i=0; i<MAX_DLCLOSE_MTCP_CALLS; ++i){
79   	      if(dlclose(theMtcpHandle) != 0){
80   	        //failed call means it is unloaded
81   	        JTRACE("dlclose(libmtcp.so) worked");
82   	        break;
83   	      }else{
84   	        JTRACE("dlclose(libmtcp.so) decremented refcount");
85   	      }
86   	    }
87   	    theMtcpHandle = find_and_open_mtcp_so();
88   	    JTRACE ( "reopening libmtcp.so DONE" ) ( theMtcpHandle );
89   	    return 0;
90   	  }
91   	
92   	  void* tmp = dlsym ( theMtcpHandle, name );
93   	  JASSERT ( tmp != NULL ) ( name )
94   	    .Text ( "failed to find libmtcp.so symbol for 'name'" );
95   	
96   	  //JTRACE("looking up libmtcp.so symbol")(name);
97   	
98   	  return tmp;
99   	}
100  	
101  	extern "C"
102  	{
103  	  typedef int ( *t_mtcp_init ) ( char const *checkpointFilename, int interval, int clonenabledefault );
104  	  typedef void ( *t_mtcp_set_callbacks ) ( void ( *sleep_between_ckpt ) ( int sec ),
105  	          void ( *pre_ckpt ) ( char ** ckptFilename ),
106  	          void ( *post_ckpt ) ( int is_restarting ),
107  	          int  ( *ckpt_fd ) ( int fd ),
108  	          void ( *write_ckpt_prefix ) ( int fd ),
109  	          void ( *restore_virtual_pid_table) ());
110  	  typedef int ( *t_mtcp_ok ) ( void );
111  	  typedef void ( *t_mtcp_kill_ckpthread ) ( void );
112  	}
113  	
114  	static void callbackSleepBetweenCheckpoint ( int sec )
115  	{
116  	  dmtcp::DmtcpWorker::instance().waitForStage1Suspend();
117  	
118  	  // After acquiring this lock, there shouldn't be any
119  	  // allocations/deallocations and JASSERT/JTRACE/JWARNING/JNOTE etc.; the
120  	  // process can deadlock.
121  	  JALIB_CKPT_LOCK();
122  	}
123  	
124  	static void callbackPreCheckpoint( char ** ckptFilename )
125  	{
126  	  JALIB_CKPT_UNLOCK();
127  	
128  	  //now user threads are stopped
129  	  dmtcp::userHookTrampoline_preCkpt();
130  	#ifdef EXTERNAL_SOCKET_HANDLING
131  	  if (dmtcp::DmtcpWorker::instance().waitForStage2Checkpoint() == false) {
132  	    char *nullDevice = (char *) "/dev/null";
133  	    *ckptFilename = nullDevice;
134  	    delayedCheckpoint = true;
135  	  } else
136  	#else
137  	  dmtcp::DmtcpWorker::instance().waitForStage2Checkpoint();
138  	#endif
139  	  {
140  	    // If we don't modify *ckptFilename, then MTCP will continue to use
141  	    //  its default filename, which was passed to it via our call to mtcp_init()
142  	#ifdef UNIQUE_CHECKPOINT_FILENAMES
143  	    dmtcp::UniquePid::ThisProcess().incrementGeneration();
144  	    *ckptFilename = const_cast<char *>(dmtcp::UniquePid::checkpointFilename());
145  	#endif
146  	  }
147  	}
148  	
149  	
150  	static void callbackPostCheckpoint ( int isRestart )
151  	{
152  	  if ( isRestart )
153  	  {
154  	    dmtcp::DmtcpWorker::instance().postRestart();
155  	    /* FIXME: There is not need to call sendCkptFilenameToCoordinator() but if
156  	     *        we do not call it, it exposes a bug in dmtcp_coordinator.
157  	     * BUG: The restarting process reconnects to the coordinator and the old
158  	     *      connection is discarded. However, the coordinator doesn't discard
159  	     *      the old connection right away (since it can't detect if the other
160  	     *      end of the socket is closed). It is only discarded after the next
161  	     *      read phase (coordinator trying to read from all the connected
162  	     *      workers) in monitorSockets() is complete.  In this read phase, an
163  	     *      error is recorded on the closed socket and in the next iteration of
164  	     *      verifying the _dataSockets, this socket is closed and the
165  	     *      corresponding entry in _dataSockets is freed.
166  	     *
167  	     *      The problem occurrs when some other worker sends a status messages
168  	     *      which should take the computation to the next barrier, but since
169  	     *      the _to_be_disconnected socket is present, the minimum state is not
170  	     *      reached unanimously and hence the coordinator doesn't raise the
171  	     *      barrier.
172  	     *
173  	     *      The bug was observed by Kapil in gettimeofday test program. It can
174  	     *      be seen in 1 out of 3 restart attempts.
175  	     *
176  	     *      The current solution is to send a dummy message to coordinator here
177  	     *      before sending a proper request.
178  	     */
179  	    dmtcp::DmtcpWorker::instance().sendCkptFilenameToCoordinator();
180  	    dmtcp::DmtcpWorker::instance().waitForStage3Refill(isRestart);
181  	  }
182  	  else
183  	  {
184  	#ifdef EXTERNAL_SOCKET_HANDLING
185  	    if ( delayedCheckpoint == false )
186  	#endif
187  	    {
188  	      dmtcp::DmtcpWorker::instance().sendCkptFilenameToCoordinator();
189  	      dmtcp::DmtcpWorker::instance().waitForStage3Refill(isRestart);
190  	      dmtcp::DmtcpWorker::instance().waitForStage4Resume();
191  	    }
192  	
193  	    //now everything but threads are restored
194  	    dmtcp::userHookTrampoline_postCkpt(isRestart);
195  	
196  	    // After this point, the user threads will be unlocked in mtcp.c and will
197  	    // resume their computation and so it is OK to set the process state to
198  	    // RUNNING.
199  	    dmtcp::WorkerState::setCurrentState( dmtcp::WorkerState::RUNNING );
200  	  }
201  	}
202  	
203  	static int callbackShouldCkptFD ( int /*fd*/ )
204  	{
205  	  //mtcp should never checkpoint file descriptors;  dmtcp will handle it
206  	  return 0;
207  	}
208  	
209  	static void callbackWriteCkptPrefix ( int fd )
210  	{
211  	  dmtcp::DmtcpWorker::instance().writeCheckpointPrefix(fd);
212  	}
213  	
214  	static void callbackRestoreVirtualPidTable ( )
215  	{
216  	  dmtcp::DmtcpWorker::instance().waitForStage4Resume();
217  	  dmtcp::DmtcpWorker::instance().restoreVirtualPidTable();
218  	
219  	  //now everything but threads are restored
220  	  dmtcp::userHookTrampoline_postCkpt(true);
221  	
222  	  // After this point, the user threads will be unlocked in mtcp.c and will
223  	  // resume their computation and so it is OK to set the process state to
224  	  // RUNNING.
225  	  dmtcp::WorkerState::setCurrentState( dmtcp::WorkerState::RUNNING );
226  	} 
227  	
228  	#ifdef PTRACE
229  	// See comment above about initializeMtcpPtrace and how to remove all
230  	//   these typedef statements.  - Gene
231  	typedef pid_t (*get_saved_pid_t) ();
232  	get_saved_pid_t get_saved_pid_ptr = NULL;
233  	
234  	typedef int (*get_saved_status_t) ();
235  	get_saved_status_t get_saved_status_ptr = NULL;
236  	
237  	typedef int (*get_has_status_and_pid_t) ();
238  	get_has_status_and_pid_t get_has_status_and_pid_ptr = NULL;
239  	
240  	typedef void (*reset_pid_status_t) ();
241  	reset_pid_status_t reset_pid_status_ptr = NULL;
242  	
243  	typedef void (*set_singlestep_waited_on_t) ( pid_t superior, pid_t inferior, int value );
244  	set_singlestep_waited_on_t set_singlestep_waited_on_ptr = NULL;
245  	
246  	typedef int (*get_is_waitpid_local_t) ();
247  	get_is_waitpid_local_t get_is_waitpid_local_ptr = NULL;
248  	
249  	typedef int (*get_is_ptrace_local_t) ();
250  	get_is_ptrace_local_t get_is_ptrace_local_ptr = NULL;
251  	
252  	typedef void (*unset_is_waitpid_local_t) ();
253  	unset_is_waitpid_local_t unset_is_waitpid_local_ptr = NULL;
254  	
255  	typedef void (*unset_is_ptrace_local_t) ();
256  	unset_is_ptrace_local_t unset_is_ptrace_local_ptr = NULL;
257  	
258  	sigset_t signals_set;
259  	#define MTCP_DEFAULT_SIGNAL SIGUSR2
260  	#endif 
261  	
262  	void dmtcp::initializeMtcpEngine()
263  	{
264  	#ifdef PTRACE
265  	  dmtcp::string tmpdir = dmtcp::UniquePid::getTmpDir();
266  	  char *dir =
267  	     (char*) _get_mtcp_symbol( "dir" );  
Event secure_coding: [VERY RISKY]. Using "sprintf" can cause a buffer overflow when done incorrectly. Because sprintf() assumes an arbitrarily long string, callers must be careful not to overflow the actual space of the destination. Use snprintf() instead, or correct precision specifiers.
268  	  sprintf(dir, "%s",  tmpdir.c_str());
269  	#endif
270  	
271  	  int *dmtcp_exists_ptr =
272  	    (int*) _get_mtcp_symbol( "dmtcp_exists" );
273  	  *dmtcp_exists_ptr = 1;
274  	
275  	  int *dmtcp_info_pid_virtualization_enabled_ptr =
276  	    (int*) _get_mtcp_symbol( "dmtcp_info_pid_virtualization_enabled" );
277  	
278  	#ifdef PID_VIRTUALIZATION
279  	  *dmtcp_info_pid_virtualization_enabled_ptr = 1;
280  	#else
281  	  *dmtcp_info_pid_virtualization_enabled_ptr = 0;
282  	#endif
283  	
284  	  int *dmtcp_info_stderr_fd =
285  	    (int*) _get_mtcp_symbol( "dmtcp_info_stderr_fd" );
286  	  *dmtcp_info_stderr_fd = PROTECTED_STDERR_FD;
287  	
288  	#ifdef DEBUG
289  	  int *dmtcp_info_jassertlog_fd =
290  	    (int*) _get_mtcp_symbol( "dmtcp_info_jassertlog_fd" );
291  	  *dmtcp_info_jassertlog_fd = PROTECTED_JASSERTLOG_FD;
292  	#endif
293  	
294  	  int *dmtcp_info_restore_working_directory =
295  	    (int*) _get_mtcp_symbol( "dmtcp_info_restore_working_directory" );
296  	  // DMTCP restores working dir only if --checkpoint-open-files invoked.
297  	  // Later, we may offer the user a separate command line option for this.
298  	  if (getenv(ENV_VAR_CKPT_OPEN_FILES))
299  	    *dmtcp_info_restore_working_directory = 1;
300  	  else
301  	    *dmtcp_info_restore_working_directory = 0;
302  	
303  	  t_mtcp_set_callbacks setCallbks =
304  	    (t_mtcp_set_callbacks)_get_mtcp_symbol ( "mtcp_set_callbacks" );
305  	
306  	  t_mtcp_init init = (t_mtcp_init)_get_mtcp_symbol ( "mtcp_init" );
307  	  t_mtcp_ok okFn = (t_mtcp_ok)_get_mtcp_symbol ( "mtcp_ok" );
308  	
309  	#ifdef PTRACE
310  	  // This ptrace code should be in a separate function,
311  	  //   extern "C" void initializeMtcpPtrace() { ... }
312  	  // Then you also get the benefit of implicit casts from "void *" to
313  	  //   other pointer, and you can then get rid of all these types XXX_t.
314  	  // - Gene
315  	  sigemptyset (&signals_set);
316  	  sigaddset (&signals_set, MTCP_DEFAULT_SIGNAL);
317  	
318  	  set_singlestep_waited_on_ptr =
319  	    (set_singlestep_waited_on_t)_get_mtcp_symbol ( "set_singlestep_waited_on" );
320  	
321  	  get_is_waitpid_local_ptr =
322  	    (get_is_waitpid_local_t)_get_mtcp_symbol ( "get_is_waitpid_local" );
323  	
324  	  get_is_ptrace_local_ptr =
325  	    (get_is_ptrace_local_t)_get_mtcp_symbol ( "get_is_ptrace_local" );
326  	
327  	  unset_is_waitpid_local_ptr =
328  	    (unset_is_waitpid_local_t)_get_mtcp_symbol ( "unset_is_waitpid_local" );
329  	
330  	  unset_is_ptrace_local_ptr =
331  	    (unset_is_ptrace_local_t)_get_mtcp_symbol ( "unset_is_ptrace_local" );
332  	
333  	  get_saved_pid_ptr = (get_saved_pid_t)_get_mtcp_symbol ( "get_saved_pid" );
334  	
335  	  get_saved_status_ptr =
336  	    (get_saved_status_t)_get_mtcp_symbol ( "get_saved_status" );
337  	
338  	  get_has_status_and_pid_ptr =
339  	    (get_has_status_and_pid_t)_get_mtcp_symbol ( "get_has_status_and_pid" );
340  	
341  	  reset_pid_status_ptr =
342  	    (reset_pid_status_t)_get_mtcp_symbol ( "reset_pid_status" );
343  	#endif
344  	
345  	  ( *setCallbks )( &callbackSleepBetweenCheckpoint
346  	                 , &callbackPreCheckpoint
347  	                 , &callbackPostCheckpoint
348  	                 , &callbackShouldCkptFD
349  	                 , &callbackWriteCkptPrefix
350  	                 , &callbackRestoreVirtualPidTable);
351  	  JTRACE ("Calling mtcp_init");
352  	  ( *init ) ( UniquePid::checkpointFilename(),0xBadF00d,1 );
353  	  ( *okFn ) ();
354  	
355  	  JTRACE ( "mtcp_init complete" ) ( UniquePid::checkpointFilename() );
356  	}
357  	
358  	#ifdef PID_VIRTUALIZATION
359  	struct ThreadArg {
360  	  int ( *fn ) ( void *arg );
361  	  void *arg;
362  	  pid_t original_tid;
363  	};
364  	
365  	// bool isConflictingTid( pid_t tid )
366  	// {
367  	//   /*  If tid is not an original tid (return same tid), then there is no conflict
368  	//    *  If tid is an original tid with the same current tid, then there
369  	//    *   is no conflict because that's us.
370  	//    *  If tid is an original tid with a different current tid, then there
371  	//    *   is a conflict.
372  	//    */
373  	//   if (tid == dmtcp::VirtualPidTable::instance().originalToCurrentPid( tid ))
374  	//     return false;
375  	//   return true;
376  	// }
377  	
378  	int thread_start(void *arg)
379  	{
380  	  struct ThreadArg *threadArg = (struct ThreadArg*) arg;
381  	  pid_t tid = _real_gettid();
382  	
383  	  typedef void ( *fill_in_pthread_t ) ( pid_t tid, pthread_t pth );
384  	  static fill_in_pthread_t fill_in_pthread_ptr = ( fill_in_pthread_t ) _get_mtcp_symbol ( "fill_in_pthread" );
385  	
386  	  fill_in_pthread_ptr (tid, pthread_self()); 
387  	  
388  	  if ( dmtcp::VirtualPidTable::isConflictingPid ( tid ) ) {
389  	    JTRACE ("Tid Conflict detected. Exiting Thread");
390  	    return 0;
391  	  }
392  	
393  	  pid_t original_tid = threadArg -> original_tid;
394  	  int (*fn) (void *) = threadArg->fn;
395  	  void *thread_arg = threadArg->arg;
396  	
397  	  // Free the memory which was previously allocated by calling JALLOC_HELPER_MALLOC
398  	  JALLOC_HELPER_FREE(threadArg);
399  	
400  	  if (original_tid == -1) {
401  	    /*
402  	     * original tid is not known, which means this thread never existed before
403  	     * checkpoint, so will insert the original_tid into virtualpidtable
404  	     */
405  	    original_tid = syscall(SYS_gettid);
406  	    JASSERT ( tid == original_tid ) (tid) (original_tid)
407  	      .Text ( "syscall(SYS_gettid) and _real_gettid() returning different values for the newly created thread!" );
408  	    dmtcp::VirtualPidTable::instance().insertTid ( original_tid );
409  	  }
410  	
411  	  dmtcp::VirtualPidTable::instance().updateMapping ( original_tid, tid );
412  	
413  	  JTRACE ( "Calling user function" ) (original_tid);
414  	
415  	  /* Thread finished initialization, its now safe for this thread to
416  	   * participate in checkpoint. Decrement the uninitializedThreadCount in
417  	   * DmtcpWorker.
418  	   */
419  	  dmtcp::DmtcpWorker::decrementUninitializedThreadCount();
420  	
421  	  // return (*(threadArg->fn)) ( threadArg->arg );
422  	  int result = (*fn) ( thread_arg );
423  	
424  	  JTRACE ( "Thread returned:" ) (original_tid);
425  	
426  	  /*
427  	   * This thread has finished its execution, do some cleanup on our part.
428  	   *  erasing the original_tid entry from virtualpidtable
429  	   */
430  	
431  	  dmtcp::VirtualPidTable::instance().erase ( original_tid );
432  	  dmtcp::VirtualPidTable::instance().eraseTid ( original_tid );
433  	
434  	  return result;
435  	}
436  	#endif
437  	
438  	//need to forward user clone
439  	extern "C" int __clone ( int ( *fn ) ( void *arg ), void *child_stack, int flags, void *arg, int *parent_tidptr, struct user_desc *newtls, int *child_tidptr )
440  	{
441  	  /*
442  	   * struct MtcpRestartThreadArg
443  	   *
444  	   * DMTCP requires the original_tids  of the threads being created during
445  	   *  the RESTARTING phase. We use MtcpRestartThreadArg structure to pass
446  	   *  the original_tid of the thread being created from MTCP to DMTCP.
447  	   *
448  	   * actual clone call: clone (fn, child_stack, flags, void *, ... )
449  	   * new clone call   : clone (fn, child_stack, flags, (struct MtcpRestartThreadArg *), ...)
450  	   *
451  	   * DMTCP automatically extracts arg from this structure and passes that
452  	   * to the _real_clone call.
453  	   *
454  	   * IMPORTANT NOTE: While updating, this structure must be kept in sync
455  	   * with the structure defined with the same name in mtcp.c
456  	   */
457  	  struct MtcpRestartThreadArg {
458  	    void * arg;
459  	    pid_t original_tid;
460  	  } *mtcpRestartThreadArg;
461  	
462  	  typedef int ( *cloneptr ) ( int ( * ) ( void* ), void*, int, void*, int*, user_desc*, int* );
463  	  // Don't make _mtcp_clone_ptr statically initialized.  After a fork, some
464  	  // loaders will relocate libmtcp.so on REOPEN_MTCP.  And we must then
465  	  // call _get_mtcp_symbol again on the newly relocated libmtcp.so .
466  	  cloneptr _mtcp_clone_ptr = ( cloneptr ) _get_mtcp_symbol ( "__clone" );
467  	
468  	  //JTRACE ( "forwarding user's clone call to mtcp" );
469  	
470  	#ifndef PID_VIRTUALIZATION
471  	  if ( dmtcp::WorkerState::currentState() != dmtcp::WorkerState::RUNNING )
472  	  {
473  	    mtcpRestartThreadArg = (struct MtcpRestartThreadArg *) arg;
474  	    arg                  = mtcpRestartThreadArg -> arg;
475  	  }
476  	
477  	  JTRACE ( "forwarding user's clone call to mtcp" );
478  	  return ( *_mtcp_clone_ptr ) ( fn,child_stack,flags,arg,parent_tidptr,newtls,child_tidptr );
479  	
480  	#else
481  	
482  	  /* Acquire the wrapperExeution lock
483  	   * (Make sure to unlock before returning from this function)
484  	   * Also increment the uninitialized thread count.
485  	   */
486  	  WRAPPER_EXECUTION_DISABLE_CKPT();
487  	  dmtcp::DmtcpWorker::incrementUninitializedThreadCount();
488  	
489  	
490  	  pid_t originalTid = -1;
491  	
492  	  if ( dmtcp::WorkerState::currentState() != dmtcp::WorkerState::RUNNING )
493  	  {
494  	    mtcpRestartThreadArg = (struct MtcpRestartThreadArg *) arg;
495  	    arg         = mtcpRestartThreadArg -> arg;
496  	    originalTid = mtcpRestartThreadArg -> original_tid;
497  	  }
498  	
499  	  // We have to use DMTCP specific memory allocator because using glibc:malloc
500  	  // can interfere with user theads
501  	  struct ThreadArg *threadArg = (struct ThreadArg *) JALLOC_HELPER_MALLOC (sizeof (struct ThreadArg));
502  	  threadArg->fn = fn;
503  	  threadArg->arg = arg;
504  	  threadArg->original_tid = originalTid;
505  	
506  	  int tid;
507  	
508  	  /*
509  	   * originalTid == -1 indicates that the thread is being created for the first
510  	   * time in the process i.e. we are not restoring from a checkpoint
511  	   */
512  	
513  	  while (1) {
514  	    if (originalTid == -1) {
515  	      /* First time thread creation */
516  	      JTRACE ( "forwarding user's clone call to mtcp" );
517  	      tid = ( *_mtcp_clone_ptr ) ( thread_start,child_stack,flags,threadArg,parent_tidptr,newtls,child_tidptr );
518  	    } else {
519  	      /* Recreating thread during restart */
520  	      JTRACE ( "calling libc:__clone" );
521  	      tid = _real_clone ( thread_start,child_stack,flags,threadArg,parent_tidptr,newtls,child_tidptr );
522  	    }
523  	
524  	    if (tid == -1) {
525  	      // Free the memory which was previously allocated by calling
526  	      // JALLOC_HELPER_MALLOC
527  	      JALLOC_HELPER_FREE ( threadArg );
528  	
529  	      /* If clone() failed, decrement the uninitialized thread count, since
530  	       * there is none
531  	       */
532  	      dmtcp::DmtcpWorker::decrementUninitializedThreadCount();
533  	      break;
534  	    }
535  	
536  	    if ( dmtcp::VirtualPidTable::isConflictingPid ( tid ) ) {
537  	    //if ( isConflictingTid ( tid ) ) {
538  	      /* Issue a waittid for the newly created thread (if required.) */
539  	      JTRACE ( "TID Conflict detected, creating a new child thread" ) ( tid );
540  	    } else {
541  	      JTRACE ("New Thread Created") (tid);
542  	      if (originalTid != -1)
543  	      {
544  	        /* creating thread while restarting, we need to notify other processes */
545  	        dmtcp::VirtualPidTable::instance().updateMapping ( originalTid, tid );
546  	        dmtcp::VirtualPidTable::InsertIntoPidMapFile(originalTid, tid );
547  	        tid = originalTid;
548  	      } else {
549  	        /* Newly created thread, insert mappings */
550  	        dmtcp::VirtualPidTable::instance().updateMapping ( tid, tid );
551  	      }
552  	      break;
553  	    }
554  	  }
555  	
556  	  /* Release the wrapperExeution lock */
557  	  WRAPPER_EXECUTION_ENABLE_CKPT();
558  	
559  	  return tid;
560  	
561  	#endif
562  	}
563  	
564  	extern "C" int pthread_join (pthread_t thread, void **value_ptr) {
565  	  typedef void ( *delete_thread_on_pthread_join_t) ( pthread_t pth );
566  	  static delete_thread_on_pthread_join_t delete_thread_on_pthread_join_ptr = ( delete_thread_on_pthread_join_t ) _get_mtcp_symbol ( "delete_thread_on_pthread_join" );
567  	  int retval = _real_pthread_join (thread, value_ptr);
568  	  delete_thread_on_pthread_join_ptr (thread);
569  	  return retval;
570  	}
571  	
572  	#ifdef PTRACE
573  	#ifndef PID_VIRTUALIZATION
574  	#error "PTRACE can not be used without enabling PID-Virtualization"
575  	#endif
576  	// ptrace cannot work without pid virtualization.  If we're not using
577  	// pid virtualization, then disable this wrapper around ptrace, and
578  	// let the application call ptrace from libc.
579  	
580  	// These constants must agree with the constants in mtcp/mtcp.c
581  	#define PTRACE_UNSPECIFIED_COMMAND 0
582  	#define PTRACE_SINGLESTEP_COMMAND 1
583  	#define PTRACE_CONTINUE_COMMAND 2
584  	
585  	extern "C" long ptrace ( enum __ptrace_request request, ... )
586  	{
587  	  va_list ap;
588  	  pid_t pid;
589  	  void *addr;
590  	  void *data;
591  	
592  	  pid_t superior;
593  	  pid_t inferior;
594  	
595  	  long ptrace_ret;
596  	
597  	  typedef void ( *writeptraceinfo_t ) ( pid_t superior, pid_t inferior );
598  	  static writeptraceinfo_t writeptraceinfo_ptr = ( writeptraceinfo_t ) _get_mtcp_symbol ( "writeptraceinfo" );
599  	
600  	  typedef void ( *write_info_to_file_t ) ( int file, pid_t superior, pid_t inferior );
601  	  static write_info_to_file_t write_info_to_file_ptr = ( write_info_to_file_t ) _get_mtcp_symbol ( "write_info_to_file" );
602  	
603  	  typedef void ( *remove_from_ptrace_pairs_t) ( pid_t superior, pid_t inferior );
604  	  static remove_from_ptrace_pairs_t remove_from_ptrace_pairs_ptr =
605  	                           ( remove_from_ptrace_pairs_t ) _get_mtcp_symbol ( "remove_from_ptrace_pairs" );
606  	
607  	  typedef void ( *handle_command_t ) ( pid_t superior, pid_t inferior, int last_command );
608  	  static handle_command_t handle_command_ptr = ( handle_command_t ) _get_mtcp_symbol ( "handle_command" );
609  	
610  	
611  	  va_start( ap, request );
612  	  pid = va_arg( ap, pid_t );
613  	  addr = va_arg( ap, void * );
614  	  data = va_arg( ap, void * );
615  	  va_end( ap );
616  	  superior = syscall( SYS_gettid );
617  	  inferior = pid;
618  	
619  	  switch (request) {
620  	    case PTRACE_ATTACH: {
621  	     if (!get_is_ptrace_local_ptr ()) writeptraceinfo_ptr ( superior, inferior );
622  	      else unset_is_ptrace_local_ptr ();
623  	      break;
624  	    }
625  	    case PTRACE_TRACEME: {
626  	      superior = getppid();
627  	      inferior = syscall( SYS_gettid );
628  	      writeptraceinfo_ptr( superior, inferior );
629  	      break;
630  	    }
631  	    case PTRACE_DETACH: {
632  	     if (!get_is_ptrace_local_ptr ()) remove_from_ptrace_pairs_ptr ( superior, inferior );
633  	     else unset_is_ptrace_local_ptr ();
634  	     break;
635  	    }
636  	    case PTRACE_CONT: {
637  	     if (!get_is_ptrace_local_ptr ()) handle_command_ptr ( superior, inferior, PTRACE_CONTINUE_COMMAND );
638  	     else unset_is_ptrace_local_ptr ();
639  	     break;
640  	    }
641  	    case PTRACE_SINGLESTEP: {
642  	     pid = dmtcp::VirtualPidTable::instance().originalToCurrentPid( pid );
643  	     if (!get_is_ptrace_local_ptr ()) {
644  	        if (_real_pthread_sigmask (SIG_BLOCK, &signals_set, NULL) != 0) {
645  	                perror ("waitpid wrapper");
646  	                 exit(-1);
647  	        }
648  	        handle_command_ptr (superior, inferior, PTRACE_SINGLESTEP_COMMAND);
649  	        ptrace_ret =  _real_ptrace (request, pid, addr, data);
650  	        if (_real_pthread_sigmask (SIG_UNBLOCK, &signals_set, NULL) != 0) {
651  	                perror ("waitpid wrapper");
652  	                exit(-1);
653  	        }
654  	     }
655  	     else {
656  	        ptrace_ret =  _real_ptrace (request, pid, addr, data);
657  	        unset_is_ptrace_local_ptr ();
658  	     }
659  	     break;
660  	    }
661  	    case PTRACE_SETOPTIONS: {
662  	     write_info_to_file_ptr (1, superior, inferior);
663  	     break;
664  	    }
665  	    default: {
666  	      break;
667  	    }
668  	  }
669  	
670  	  /* TODO: We might want to check the return value in certain cases */
671  	
672  	  if ( request != PTRACE_SINGLESTEP ) {
673  	        pid = dmtcp::VirtualPidTable::instance().originalToCurrentPid( pid );
674  	        ptrace_ret =  _real_ptrace( request, pid, addr, data );
675  	  }
676  	
677  	  return ptrace_ret;
678  	}
679  	#endif
680  	
681  	  // This is called by the child process, only, via DmtcpWorker::resetOnFork().
682  	  // We know that no one can send the SIG_CKPT signal, since if the
683  	  //   the coordinator had requested a checkpoint, then either the
684  	  //   the child successfully forked, or the thread of the parent process
685  	  //   seeing the fork is processing the checkpoint signal first.  The
686  	  //   latter case is no problem.  If the child successfully forked, then
687  	  //   the SIG_CKPT sent by the checkpoint thread of the parent process prior
688  	  //   to forking is too late to affect the child.  The checkpoint thread
689  	  //   of the parent process may continue its own checkpointing, but
690  	  //   the child process will not take part.  It's the coordinator's
691  	  //   responsibility to then also send a checkpoint message to the checkpoint
692  	  //   thread of the child.  DOES THE COORDINATOR DO THIS?
693  	  // After a fork, only the child's user thread (which called fork())
694  	  //   exists (and we know it's not our own checkpoint thread).  So, no
695  	  //   thread is listening for a checkpoint command via the socket
696  	  //   from the coordinator, _even_ if the coordinator decided to start
697  	  //   the checkpoint immediately after the fork.  The child can't checkpoint
698  	  //   until we call mtcp_init in the child, as described below.
699  	  //   Note that resetOnFork() is the last thing done by the child before the
700  	  //   fork wrapper returns.
701  	  //   Jason, PLEASE VERIFY THE LOGIC ABOVE.  IT'S FOR THIS REASON, WE
702  	  //   SHOULDN'T NEED delayCheckpointsLock.  Thanks.  - Gene
703  	
704  	  // shutdownMtcpEngineOnFork will dlclose the old libmtcp.so and will
705  	  //   dlopen a new libmtcp.so.  DmtcpWorker constructor then calls
706  	  //   initializeMtcpEngine, which will then call mtcp_init.  We must close
707  	  //   the old SIG_CKPT handler prior to this, so that MTCP and mtcp_init()
708  	  //   don't think someone else is using their SIG_CKPT signal.
709  	void dmtcp::shutdownMtcpEngineOnFork()
710  	{
711  	  int _determineMtcpSignal(); // from signalwrappers.cpp
712  	  // Remove our signal handler from our SIG_CKPT
713  	  errno = 0;
714  	  JWARNING (SIG_ERR != _real_signal(_determineMtcpSignal(), SIG_DFL))
715  	           (_determineMtcpSignal())
716  	           (JASSERT_ERRNO)
717  	           .Text("failed to reset child's checkpoint signal on fork");
718  	  _get_mtcp_symbol ( REOPEN_MTCP );
719  	}
720  	
721  	void dmtcp::killCkpthread()
722  	{
723  	  t_mtcp_kill_ckpthread kill_ckpthread =
724  	    (t_mtcp_kill_ckpthread) _get_mtcp_symbol( "mtcp_kill_ckpthread" );
725  	  kill_ckpthread();
726  	}