1    	/****************************************************************************
2    	 *   Copyright (C) 2006-2010 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3    	 *   jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu              *
4    	 *                                                                          *
5    	 *   This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src).  *
6    	 *                                                                          *
7    	 *  DMTCP:dmtcp/src is free software: you can redistribute it and/or        *
8    	 *  modify it under the terms of the GNU Lesser General Public License as   *
9    	 *  published by the Free Software Foundation, either version 3 of the      *
10   	 *  License, or (at your option) any later version.                         *
11   	 *                                                                          *
12   	 *  DMTCP:dmtcp/src is distributed in the hope that it will be useful,      *
13   	 *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
14   	 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
15   	 *  GNU Lesser General Public License for more details.                     *
16   	 *                                                                          *
17   	 *  You should have received a copy of the GNU Lesser General Public        *
18   	 *  License along with DMTCP:dmtcp/src.  If not, see                        *
19   	 *  <http://www.gnu.org/licenses/>.                                         *
20   	 ****************************************************************************/
21   	
22   	#include <unistd.h>
23   	
24   	#include <stdlib.h>
25   	#include <string>
26   	#include <stdio.h>
27   	#include  "../jalib/jassert.h"
28   	#include  "../jalib/jfilesystem.h"
29   	#include "connectionmanager.h"
30   	#include "dmtcpworker.h"
31   	#include "dmtcpmessagetypes.h"
32   	#include "connectionstate.h"
33   	#include "mtcpinterface.h"
34   	#include "syscallwrappers.h"
35   	#include "protectedfds.h"
36   	#include "util.h"
37   	#include <sys/types.h>
38   	#include <sys/stat.h>
39   	#include <fcntl.h>
40   	#include <errno.h>
41   	#include <vector>
42   	
43   	// Some global definitions
44   	static dmtcp::UniquePid compGroup;
45   	static int numPeers;
46   	static int coordTstamp = 0;
47   	
48   	dmtcp::string dmtcpTmpDir = "/DMTCP/UnInitialized/Tmp/Dir";
49   	
50   	using namespace dmtcp;
51   	
52   	#ifdef PID_VIRTUALIZATION
53   	static void openOriginalToCurrentMappingFiles();
54   	void unlockPidMapFile();
55   	#endif
56   	static void runMtcpRestore ( const char* path, int offset );
57   	
58   	namespace
59   	{
60   	
61   	#ifdef PID_VIRTUALIZATION
62   	
63   	  class OriginalPidTable {
64   	    public:
65   	      OriginalPidTable(){}
66   	
67   	      void insertFromVirtualPidTable ( dmtcp::VirtualPidTable& vt )
68   	      {
69   	        dmtcp::vector< pid_t > tmpVector;
70   	
71   	        _insert(vt.pid());
72   	
73   	        tmpVector = vt.getChildPidVector();
74   	        for ( size_t i = 0; i < tmpVector.size(); ++i )
75   	          _insert(tmpVector[i]);
76   	
77   	        tmpVector = vt.getTidVector();
78   	        for ( size_t i = 0; i < tmpVector.size(); ++i )
79   	          _insert(tmpVector[i]);
80   	      }
81   	
82   	      void _insert( pid_t pid )
83   	      {
84   	        if (!isConflictingChildPid (pid) /* && newVector[i] != getpid()*/) {
85   	          _vector.push_back ( pid );
86   	          JTRACE("New Pid Pushed to PidVector") (pid);
87   	        }
88   	      }
89   	
90   	      bool isConflictingChildPid ( pid_t pid )
91   	      {
92   	        //iterator i = _vector.find ( pid );
93   	        //if ( i == _vector.end() )
94   	        //  return false;
95   	        for ( size_t i = 0; i < _vector.size(); ++i )
96   	          if ( _vector[i] == pid )
97   	            return true;
98   	
99   	        return false;
100  	      }
101  	
102  	      size_t numPids () { return _vector.size(); }
103  	
104  	    private:
105  	      typedef dmtcp::vector< pid_t >::iterator iterator;
106  	      dmtcp::vector< pid_t > _vector;
107  	  };
108  	
109  	  OriginalPidTable originalPidTable;
110  	
111  	#endif
112  	
113  	
114  	  class RestoreTarget
115  	  {
116  	  public:
117  	    RestoreTarget ( const dmtcp::string& path )
118  	      : _path ( path )
119  	    {
120  	      JASSERT ( jalib::Filesystem::FileExists ( _path ) ) ( _path )
121  			.Text ( "checkpoint file missing" );
122  	#ifdef PID_VIRTUALIZATION
123  	      _offset = _conToFd.loadFromFile(_path, _compGroup, _numPeers,
124  					      _virtualPidTable);
125  	      _virtualPidTable.erase(getpid());
126  	      _roots.clear();
127  	      _children.clear();
128  	      _smap.clear();
129  	      _used = 0;
130  	#else
131  	      _offset = _conToFd.loadFromFile(_path, _compGroup, _numPeers);
132  	#endif
133  	      JTRACE ( "restore target" ) ( _path ) (_numPeers ) (_compGroup)
134  		                          ( _conToFd.size() ) (_offset);
135  	    }
136  	
137  	    void dupAllSockets ( SlidingFdTable& slidingFd )
138  	    {
139  	      int lastfd = -1;
140  	      dmtcp::vector<int> fdlist;
141  	      for ( ConnectionToFds::const_iterator i = _conToFd.begin();
142  		    i!=_conToFd.end(); ++i )
143  	        {
144  	          Connection& con = ConnectionList::instance() [i->first];
145  	          if ( con.conType() == Connection::INVALID ){
146  	            JWARNING(false)(i->first).Text("Can't restore invalid Connection");
147  	            continue;
148  	          }
149  	
150  	          const dmtcp::vector<int>& fds = i->second;
151  	          for ( size_t x=0; x<fds.size(); ++x )
152  		    {
153  		      int fd = fds[x];
154  		      fdlist.push_back ( fd );
155  		      slidingFd.freeUpFd ( fd );
156  		      int oldFd = slidingFd.getFdFor ( i->first );
157  		      JTRACE ( "restoring fd" ) ( i->first ) ( oldFd ) ( fd );
158  		      //let connection do custom dup2 handling
159  		      con.restartDup2( oldFd, fd );
160  	
161  		      if ( fd > lastfd )
162  			{
163  			  lastfd = fd;
164  			}
165  		    }
166  	        }
167  	
168  	      size_t j;
169  	      for ( int i = 0 ; i < slidingFd.startFd() ; i++ )
170  	        {
171  	          for ( j = 0 ; j < fdlist.size() ; j++ )
172  		    {
173  		      if ( fdlist.at ( j ) == i )
174  			break;
175  		    }
176  	          if ( j == fdlist.size() )
177  		    {
178  		      _real_close ( i );
179  		    }
180  	        }
181  	
182  	      slidingFd.closeAll();
183  	    }
184  				
185  	    int find_stdin( SlidingFdTable& slidingFd )
186  	    {
187  	      for ( ConnectionToFds::const_iterator i = _conToFd.begin();
188  	          i!=_conToFd.end(); ++i )
189  	      {
190  	        const dmtcp::vector<int>& fds = i->second;
191  	        for ( size_t x=0; x<fds.size(); ++x )
192  	        {
193  	          if (fds[x] == STDIN_FILENO){
194  	            JTRACE("Found stdin: fds[x] <---> slidingFd.getFdFor()")
195  	              (x) (fds[x]) (slidingFd.getFdFor ( i->first ));
196  	            return slidingFd.getFdFor ( i->first );
197  	          }
198  	        }
199  	      }
200  	      return -1;
201  	    }
202  	
203  	    void mtcpRestart()
204  	    {
205  	      runMtcpRestore ( _path.c_str(), _offset );
206  	    }
207  	
208  	    const UniquePid& pid() const { return _conToFd.pid(); }
209  	    const dmtcp::string& procname() const { return _conToFd.procname(); }
210  	
211  	#ifdef PID_VIRTUALIZATION
212  	    typedef map<pid_t,bool> sidMapping;
213  	    typedef sidMapping::iterator s_iterator;
214  	    typedef vector<RestoreTarget *>::iterator t_iterator;
215  	
216  	    VirtualPidTable& getVirtualPidTable() { return _virtualPidTable; }
217  	    void addChild(RestoreTarget *t){ _children.push_back(t); }
218  	
219  	    bool isSessionLeader(){
220  	      JTRACE("")(_virtualPidTable.sid()) (pid().pid());
221  	      if( _virtualPidTable.sid() == pid().pid() )
222  		return true;
223  	      else
224  		return false;
225  	    }
226  	
227  	    bool isGroupLeader(){
228  	      JTRACE("")(_virtualPidTable.sid()) (pid().pid());
229  	      if( _virtualPidTable.gid() == pid().pid() )
230  		return true;
231  	      else
232  		return false;
233  	    }
234  	
235  	    bool isForegroundProcess() {
236  	      JTRACE("")(_virtualPidTable.sid()) (pid().pid());
237  	      if( _virtualPidTable.fgid() == _virtualPidTable.gid() )
238  		return true;
239  	      else
240  		return false;
241  	    }
242  	
243  	    bool isInitChild(){
244  	      JTRACE("")(_virtualPidTable.ppid());
245  	      if( _virtualPidTable.ppid() == 1 )
246  		return true;
247  	      else
248  		return false;
249  	    }
250  	
251  	    int addRoot(RestoreTarget *t, pid_t sid){
252  	      if( isSessionLeader() && _virtualPidTable.sid() == sid ){
253  		_roots.push_back(t);
254  		return 1;
255  	      }else{
256  		t_iterator it = _children.begin();
257  		for(; it != _children.end(); it++){
258  		  if( (*it)->addRoot(t, sid) )
259  		    return 1;
260  		}
261  	      }
262  	      return 0;
263  	    }
264  	
265  	    // Traverse this process subtree and setup information about sessions
266  	    //   and their leaders for all children.
267  	    sidMapping &setupSessions() {
268  	      pid_t sid = _virtualPidTable.sid();
269  	      if( !_children.size() ) {
270  		_smap[sid] = isSessionLeader();
271  		return _smap;
272  	      }
273  	      // We have at least one child
274  	      t_iterator it = _children.begin();
275  	      _smap = (*it)->setupSessions();
276  	      for(it++; it != _children.end();it++) {
277  		sidMapping tmp = (*it)->setupSessions();
278  		s_iterator it1 = tmp.begin();
279  		for(;it1 != tmp.end(); it1++) {
280  		  s_iterator it2 = _smap.find(it1->first);
281  		  if( it2 != _smap.end() ) {
282  		    // mapping already exist
283  		    if( it2->second != it1->second ) {
284  		      // Session was created after child creation.  So child from one
285  		      // thread cannot be member of session of child from other thread.
286  		      JASSERT(false). Text("One child contains session leader"
287  					   " and other contains session member!\n");
288  		      exit(0);
289  		    }
290  		  } else {
291  		    // add new mapping
292  		    _smap[it1->first] = it1->second;
293  		  }
294  		}
295  	      }
296  	
297  	      s_iterator sit = _smap.find(sid);
298  	      if( sit != _smap.end() ) {
299  		if( sit->second && !isSessionLeader() ) {
300  		  // child is leader and parent is slave - impossible
301  		  JASSERT(false)
302  		         .Text("child is leader and parent is slave - impossible\n");
303  		  exit(0);
304  		}
305  	      }
306  	      _smap[sid] = isSessionLeader();
307  	      return _smap;
308  	}
309  	
310  	    void printMapping(){
311  	      t_iterator it = _children.begin();
312  	      for(; it != _children.end(); it++){
313  		(*it)->printMapping();
314  	      }
315  	      JTRACE("")(pid());
316  	      s_iterator sit = _smap.begin();
317  	      for(; sit != _smap.end(); sit++){
318  		JTRACE("") (sit->first) (sit->second);
319  	      }
320  	    }
321  	
322  	    sidMapping &getSmap(){ return _smap; }
323  	
324  	    pid_t checkDependence(RestoreTarget *t){
325  	      sidMapping smap = t->getSmap();
326  	      s_iterator ext = smap.begin();
327  	      // Run through sessions --> has leader mapping
328  	      for(; ext != smap.end(); ext++){
329  		if( ext->second == false ){
330  		  // Session pointed by ext has no leader in target t process tree
331  		  s_iterator intern = _smap.find(ext->first);
332  		  if( intern != _smap.end() && intern->second == true ){
333  		    // internal target has session leader in its tree
334  		    // TODO: can process trees be connected through several sessions?
335  		    return ext->first;
336  		  }
337  		}
338  	      }
339  	      return -1;
340  	    }
341  	
342  	    void bringToForeground(SlidingFdTable& slidingFd)
343  	    {
344  	      char controllingTerm[L_ctermid];
345  	      pid_t pid;
346  	
347  	      int sin = find_stdin(slidingFd);
348  	
349  	      if( isSessionLeader() ){
350  	        // XXX: Where is the controlling terminal being set?
351  		char *ptr =  ttyname(sin);
352  		int fd = open(ptr,O_RDWR);
353  		if( ctermid(controllingTerm) ){
354  		  int tfd = open(ptr,O_RDONLY);
355  		  if( tfd >= 0 ){
356  		    JTRACE("Setting current controlling terminal") (controllingTerm);
357  		    close(tfd);
358  		  }else if (ptr == NULL){
359  	            JTRACE("Cannot restore controlling terminal") (ttyname(sin));
360  	          } else {
361  		    JWARNING(false) (ttyname(sin)) 
362  	                    .Text("Cannot restore controlling terminal");
363  		  }
364  		}
365  		if (fd >= 0) close(fd);
366  	      }
367  	
368  	      pid_t gid = getpgid(0);
369  	      pid_t fgid = tcgetpgrp(sin);
370  	
371  	      if( !isForegroundProcess() )
372  		return;
373  	      if( !isGroupLeader()  ){
374  		return;
375  	      }
376  	
377  	      if( gid != fgid ){
378  		if( !(pid = fork()) ){ // fork subversive process
379  		  // This process moves itself to current foreground group
380  		  // and then changes foreground group to what we need
381  		  // so it works as a spy, saboteur or wrecker :)
382  		  // -- Artem
383  		  JTRACE("Change current GID to foreground GID.");
384  	
385  		if( setpgid(0, fgid) ){
386  	          if (fgid == -1) {
387  	            JTRACE("CANNOT Change current GID to foreground GID")
388  	                  (getpid()) (fgid) (_virtualPidTable.fgid()) (gid) (JASSERT_ERRNO);
389  	          } else {
390  	            JWARNING(false) 
391  	                     (getpid()) (fgid) (_virtualPidTable.fgid()) (gid) (JASSERT_ERRNO)
392  	                    .Text("CANNOT Change current GID to foreground GID");
393  	          }
394  	 	  fflush(stdout);
395  	 	  exit(0);
396  		}
397  	
398  	        if( tcsetpgrp(sin, gid) ){
399  		  printf("CANNOT Move parent GID to foreground: %s\n",
400  			 strerror(errno));
401  	 	  printf("PID=%d, FGID=%d, GID=%d\n",getpid(),fgid,gid);
402  	 	  printf("PID=%d, FGID=%d, _FGID=%d, GID=%d\n",
403  			 getpid(),fgid,_virtualPidTable.fgid(), gid);
404  	 	  fflush(stdout);
405  	 	  exit(0);
406  	 	  }
407  	
408  		  JTRACE("Finish foregrounding.")(getpid())(getpgid(0))(tcgetpgrp(0));
409  		  exit(0);
410  		}else{
411  		  int status;
412  		  wait(&status);
413  		}
414  	      }
415  	    }
416  	
417  	    void restoreGroup( SlidingFdTable& slidingFd )
418  	    {
419  	      if( isGroupLeader() ){
420  		// create new group where this process becomes a leader
421  		JTRACE("Create new group.");
422  		setpgid(0, 0);
423  		bringToForeground(slidingFd);
424  	      }
425  	    }
426  	
427  	    void CreateProcess(DmtcpWorker& worker, SlidingFdTable& slidingFd)
428  	    {
429  	      dmtcp::ostringstream o;
430  	      o << dmtcpTmpDir << "/jassertlog." << pid();
431  	      JASSERT_INIT(o.str());
432  	
433  	      //change UniquePid
434  	      UniquePid::resetOnFork(pid());
435  	      VirtualPidTable &vt = _virtualPidTable;
436  	
437  	      JTRACE("")(_real_getpid())(_real_getppid())(_real_getsid(0));
438  	
439  	      vt.updateMapping(pid().pid(), _real_getpid());
440  	      pid_t psid = vt.sid();
441  	
442  	      if( !isSessionLeader() ){
443  	
444  		// Restore group information
445  		restoreGroup(slidingFd);
446  	
447  		// If process is not session leader, restore it and all children.
448  		t_iterator it = _children.begin();
449  		for(; it != _children.end(); it++){
450  		  JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
451  		  pid_t cid = forkChild();
452  	
453  		  if ( cid == 0 )
454  	            {
455  	              (*it)->CreateProcess (worker, slidingFd);
456  	              JASSERT ( false ) . Text ( "Unreachable" );
457  	            }
458  		  JASSERT ( cid > 0 );
459  		  VirtualPidTable::iterator vit = vt.begin();
460  		  for(; vit != vt.end(); vit++){
461  		    if( (*it)->pid() == vit->second ){
462  		      vt.updateMapping ( vit->first, cid );
463  		      break;
464  		    }
465  		  }
466  	
467  		}
468  	      }else{
469  		// Process is session leader.
470  		// There may be not setsid-ed children.
471  		for(t_iterator it = _children.begin(); it != _children.end(); it++){
472  		  s_iterator sit = (*it)->getSmap().find(psid);
473  		  JTRACE("Restore processes that was created before their parent called setsid()");
474  		  if( sit == (*it)->getSmap().end() ){
475  		    JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
476  		    pid_t cid = forkChild();
477  		    if ( cid == 0 )
478  		      {
479  			(*it)->CreateProcess (worker, slidingFd);
480  			JASSERT ( false ) . Text ( "Unreachable" );
481  		      }
482  		    JASSERT ( cid > 0 );
483  		    VirtualPidTable::iterator vit = _virtualPidTable.begin();
484  		    for(; vit != _virtualPidTable.end(); vit++){
485  		      if( (*it)->pid() == vit->second ){
486  			_virtualPidTable.updateMapping ( vit->first, cid );
487  		      }
488  		    }
489  		  }
490  		}
491  	
492  		pid_t nsid = setsid();
493  		JTRACE("change SID")(nsid);
494  		
495  		// Restore group information
496  		restoreGroup(slidingFd);
497  	
498  		for(t_iterator it = _children.begin(); it != _children.end(); it++) {
499  		  JTRACE("Restore processes that was created after their parent called setsid()");
500  		  s_iterator sit = (*it)->getSmap().find(psid);
501  		  if( sit != (*it)->getSmap().end() ) {
502  		    JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
503  		    pid_t cid = forkChild();
504  		    if ( cid == 0 ){
505  		      (*it)->CreateProcess (worker, slidingFd );
506  		      JASSERT ( false ) . Text ( "Unreachable" );
507  		    }
508  		    JASSERT ( cid> 0 );
509  		    VirtualPidTable::iterator vit = _virtualPidTable.begin();
510  		    for(; vit != _virtualPidTable.end(); vit++) {
511  		      if( (*it)->pid() == vit->second ) {
512  			_virtualPidTable.updateMapping ( vit->first, cid );
513  		      }
514  		    }
515  		  }
516  		}
517  	
518  		for(t_iterator it = _roots.begin() ; it != _roots.end(); it++) {
519  		  JTRACE ( "Forking Dependent Root Process" ) ( (*it)->pid() );
520  		  pid_t cid;
521  		  if( (cid = fork()) ){
522  		    waitpid(cid, NULL, 0);
523  		  }else{
524  		    if( fork() )
525  		      exit(0);
526  		    (*it)->CreateProcess(worker, slidingFd );
527  		    JASSERT (false) . Text( "Unreachable" );
528  		  }
529  		}
530  	      }
531  	
532  	      JTRACE("Child and dependent root processes forked, restoring process")
533  		    (pid())(getpid())(isGroupLeader());
534  	      // Save PID mapping information
535  	      pid_t orig = pid().pid();
536  	      pid_t curr = _real_getpid();
537  	      dmtcp::VirtualPidTable::InsertIntoPidMapFile(orig, curr);
538  	
539  	      //Reconnect to dmtcp_coordinator
540  	      WorkerState::setCurrentState ( WorkerState::RESTARTING );
541  	      worker.connectToCoordinatorWithoutHandshake();
542  	      worker.sendCoordinatorHandshake(procname(), _compGroup);
543  	      dmtcp::string serialFile = dmtcp::UniquePid::pidTableFilename();
544  	
545  	      JTRACE ( "PidTableFile: ") ( serialFile ) ( dmtcp::UniquePid::ThisProcess() );
546  	      jalib::JBinarySerializeWriter tblwr ( serialFile );
547  	      _virtualPidTable.serialize ( tblwr );
548  	      tblwr.~JBinarySerializeWriter();
549  	
550  	      int stmpfd =  open( serialFile.c_str(), O_RDONLY);
551  	      JASSERT ( stmpfd >= 0 ) ( serialFile ) ( errno );
552  	
553  	      JASSERT ( dup2 ( stmpfd, PROTECTED_PIDTBL_FD) == PROTECTED_PIDTBL_FD )
554  		      ( serialFile ) ( stmpfd );
555  	
556  	      close (stmpfd);
557  	
558  	      //restart targets[i]
559  	      dupAllSockets ( slidingFd );
560  	
561  	      mtcpRestart();
562  	
563  	      JASSERT ( false ).Text ( "unreachable" );
564  	    }
565  	
566  	
567  	    static pid_t forkChild()
568  	    {
569  	      while ( 1 ) {
570  	
571  		pid_t childPid = fork();
572  	
573  		JASSERT ( childPid != -1 ) .Text ( "fork() failed" );
574  	
575  		if ( childPid == 0 ) { /* child process */
576  		  if ( originalPidTable.isConflictingChildPid ( getpid() ) )
577  		    _exit(1);
578  		  else
579  		    return 0;
580  		}
581  		else { /* Parent Process */
582  		  if ( originalPidTable.isConflictingChildPid ( childPid ) ) {
583  		    JTRACE( "PID Conflict, creating new child" ) (childPid);
584  		    waitpid ( childPid, NULL, 0 );
585  		  }
586  		  else
587  		    return childPid;
588  		}
589  	      }
590  	
591  	      return -1;
592  	    }
593  	#endif
594  	
595  	    dmtcp::string _path;
596  	    int _offset;
597  	    ConnectionToFds _conToFd;
598  	    UniquePid _compGroup;
599  	    int _numPeers;
600  	#ifdef PID_VIRTUALIZATION
601  	    VirtualPidTable _virtualPidTable;
602  	    // Links to children of this process
603  	    vector<RestoreTarget *> _children;
604  	    // Links to roots that depend on this target
605  	    // i.e. have SID of this target in its tree.
606  	    vector<RestoreTarget *> _roots;
607  	    sidMapping _smap;
608  	    bool _used;
609  	#endif
610  	  };
611  	
612  	
613  	} // end namespace
614  	
615  	// gcc-4.3.4 -Wformat=2 issues false positives for warnings unless the format
616  	// string has at least one format specifier with corresponding format argument.
617  	// Ubuntu 9.01 uses -Wformat=2 by default.
618  	static const char* theUsage =
619  	  "USAGE:\n dmtcp_restart [OPTIONS] <ckpt1.dmtcp> [ckpt2.dmtcp...]\n\n"
620  	  "OPTIONS:\n"
621  	  "  --host, -h, (environment variable DMTCP_HOST):\n"
622  	  "      Hostname where dmtcp_coordinator is run (default: localhost)\n"
623  	  "  --port, -p, (environment variable DMTCP_PORT):\n"
624  	  "      Port where dmtcp_coordinator is run (default: 7779)\n"
625  	  "  --tmpdir, -t, (environment variable DMTCP_TMPDIR):\n"
626  	  "      Directory to store temporary files \n"
627  	  "        (default: $TMDPIR/dmtcp-$USER@$HOST or /tmp/dmtcp-$USER@$HOST)\n"
628  	  "  --join, -j:\n"
629  	  "      Join an existing coordinator, raise error if one already exists\n"
630  	  "  --new, -n:\n"
631  	  "      Create a new coordinator, raise error if one already exists\n"
632  	  "  --new-coordinator:\n"
633  	  "      Create a new coordinator even if one already exists\n"
634  	  "  --batch, -b:\n"
635  	  "      Enable batch mode i.e. start the coordinator on the same node on\n"
636  	  "        a randomly assigned port (if no port is specified by --port)\n"
637  	  "  --interval, -i, (environment variable DMTCP_CHECKPOINT_INTERVAL):\n"
638  	  "      Time in seconds between automatic checkpoints.\n"
639  	  "      Not allowed if --join is specified\n"
640  	  "      --batch implies -i 3600, unless otherwise specified.\n"
641  	  "  --no-check:\n"
642  	  "      Skip check for valid coordinator and never start one automatically\n"
643  	  "  --quiet, -q, (or set environment variable DMTCP_QUIET = 0, 1, or 2):\n"
644  	  "      Skip banner and NOTE messages; if given twice, also skip WARNINGs\n\n"
645  	  "See http://dmtcp.sf.net/ for more information.\n"
646  	;
647  	
648  	static const char* theBanner =
649  	  "DMTCP/MTCP  Copyright (C) 2006-2010  Jason Ansel, Michael Rieker,\n"
650  	  "                                       Kapil Arya, and Gene Cooperman\n"
651  	  "This program comes with ABSOLUTELY NO WARRANTY.\n"
652  	  "This is free software, and you are welcome to redistribute it\n"
653  	  "under certain conditions; see COPYING file for details.\n"
654  	  "(Use flag \"-q\" to hide this message.)\n\n"
655  	;
656  	
657  	//shift args
658  	#define shift argc--,argv++
659  	
660  	dmtcp::vector<RestoreTarget> targets;
661  	
662  	#ifdef PID_VIRTUALIZATION
663  	typedef struct {
664  	  RestoreTarget *t;
665  	  bool indep;
666  	} RootTarget;
667  	dmtcp::vector<RootTarget> roots;
668  	void BuildProcessTree();
669  	void ProcessGroupInfo();
670  	void SetupSessions();
671  	
672  	#endif
673  	
674  	int main ( int argc, char** argv )
675  	{
676  	  bool autoStartCoordinator=true;
677  	  bool isRestart = true;
678  	  int allowedModes = dmtcp::DmtcpWorker::COORD_ANY;
679  	
680  	  if (! getenv(ENV_VAR_QUIET))
681  	    setenv(ENV_VAR_QUIET, "0", 0);
682  	
683  	  //process args
684  	  shift;
685  	  while(true){
686  	    dmtcp::string s = argc>0 ? argv[0] : "--help";
687  	    if(s=="--help" || (s=="-h" && argc==1)){
688  	      JASSERT_STDERR << theUsage;
689  	      //fprintf(stderr, theUsage, "");
690  	      return 1;
691  	    }else if(s == "--no-check"){
692  	      autoStartCoordinator = false;
693  	      shift;
694  	    }else if(s == "-j" || s == "--join"){
695  	      allowedModes = dmtcp::DmtcpWorker::COORD_JOIN;
696  	      shift;
697  	    }else if(s == "-n" || s == "--new"){
698  	      allowedModes = dmtcp::DmtcpWorker::COORD_NEW;
699  	      shift;
700  	    }else if(s == "--new-coordinator"){
701  	      allowedModes = dmtcp::DmtcpWorker::COORD_FORCE_NEW;
702  	      shift;
703  	    }else if(s == "-b" || s == "--batch"){
704  	      allowedModes = dmtcp::DmtcpWorker::COORD_BATCH;
705  	      shift;
706  	    }else if(s == "-i" || s == "--interval"){
707  	      setenv(ENV_VAR_CKPT_INTR, argv[1], 1);
708  	      shift; shift;
709  	    }else if(argc>1 && (s == "-h" || s == "--host")){
710  	      setenv(ENV_VAR_NAME_ADDR, argv[1], 1);
711  	      shift; shift;
712  	    }else if(argc>1 && (s == "-p" || s == "--port")){
713  	      setenv(ENV_VAR_NAME_PORT, argv[1], 1);
714  	      shift; shift;
715  	    }else if(argc>1 && (s == "-t" || s == "--tmpdir")){
716  	      setenv(ENV_VAR_TMPDIR, argv[1], 1);
717  	      shift; shift;
718  	    }else if(s == "-q" || s == "--quiet"){
Event tainted_string_return_content: "getenv" returns tainted string content.
Also see events: [tainted_string]
719  	      *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1;
720  	      // Just in case a non-standard version of setenv is being used:
Event tainted_string: Passing tainted string "getenv("DMTCP_QUIET")" to a function that cannot accept tainted data.
Also see events: [tainted_string_return_content]
721  	      setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1);
722  	      shift;
723  	    }else if( (s.length()>2 && s.substr(0, 2)=="--") ||
724  	              (s.length()>1 && s.substr(0, 1)=="-" ) ) {
725  	      JASSERT_STDERR << "Invalid Argument\n";
726  	      JASSERT_STDERR << theUsage;
727  	      return 1;
728  	    }else if(argc>1 && s=="--"){
729  	      shift;
730  	      break;
731  	    }else{
732  	      break;
733  	    }
734  	  }
735  	
736  	  dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR));
737  	  dmtcpTmpDir = dmtcp::UniquePid::getTmpDir();
738  	
739  	  jassert_quiet = *getenv(ENV_VAR_QUIET) - '0';
740  	
741  	  if (jassert_quiet == 0)
742  	    JASSERT_STDERR << theBanner;
743  	
744  	  if (autoStartCoordinator)
745  	    dmtcp::DmtcpWorker::startCoordinatorIfNeeded(allowedModes, isRestart);
746  	
747  	  //make sure JASSERT initializes now, rather than during restart
748  	  dmtcp::ostringstream o;
749  	  o << dmtcpTmpDir << "/jassertlog." << dmtcp::UniquePid(getpid());
750  	  JASSERT_INIT(o.str());
751  	  JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc);
752  	
753  	  bool doAbort = false;
754  	  for(; argc>0; shift){
755  	    dmtcp::string restorename(argv[0]);
756  	    struct stat buf;
757  	    int rc = stat(restorename.c_str(), &buf);
758  	    if (dmtcp::Util::strStartsWith(restorename, "ckpt_") &&
759  	        dmtcp::Util::strEndsWith(restorename, "_files")) {
760  	      continue;
761  	#ifndef URDB
762  	    } else if (!dmtcp::Util::strEndsWith(restorename, ".dmtcp")) {
763  	      JNOTE("File doesn't have .dmtcp extension. Check Usage.")
764  	        (restorename);
765  	      JASSERT_STDERR << theUsage;
766  	      doAbort = true;
767  	#endif
768  	    } else if (rc == -1) {
769  	      char error_msg[1024];
770  	      sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str());
771  	      perror(error_msg);
772  	      doAbort = true;
773  	    } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/
774  	      printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \
775  	             "checkpoint image (%s).\n" \
776  		     "This is dangerous.  Aborting for security reasons.\n" \
777  	           "If you still want to do this (at your own risk),\n" \
778  	           "  then modify dmtcp/src/%s:%d and re-compile.\n",
779  	           getuid(), buf.st_uid, restorename.c_str(), __FILE__, __LINE__ - 6);
780  	      doAbort = true;
781  	    }
782  	    if (doAbort) {
783  	      exit(1);
784  	    }
785  	
786  	    JTRACE("Will restart ckpt image _argv[0]_") (argv[0]);
787  	    targets.push_back ( RestoreTarget ( argv[0] ) );
788  	  }
789  	
790  	  if (targets.size() <= 0) {
791  	    JNOTE("ERROR: No DMTCP checkpoint image(s) found. Check Usage.");
792  	    JASSERT_STDERR << theUsage;
793  	    exit(1);
794  	  }
795  	
796  	  SlidingFdTable slidingFd;
797  	  ConnectionToFds conToFd;
798  	
799  	  ConnectionList& connections = ConnectionList::instance();
800  	  for ( ConnectionList::iterator i = connections.begin()
801  	                                     ; i!= connections.end()
802  	          ; ++i )
803  	  {
804  	    conToFd[i->first].push_back ( slidingFd.getFdFor ( i->first ) );
805  	    JTRACE ( "will restore" ) ( i->first ) ( conToFd[i->first].back() );
806  	  }
807  	
808  	  // Check that all targets belongs to one computation group
809  	  // If not - abort
810  	  for(size_t i=0; i<targets.size(); i++){
811  	    JTRACE ( "Check targets: " )
812  	      ( targets[i]._path ) ( targets[i]._compGroup ) ( targets[i]._numPeers );
813  	  }
814  	
815  	  compGroup = targets[0]._compGroup;
816  	  numPeers = targets[0]._numPeers;
817  	  for(size_t i=0; i<targets.size(); i++){
818  	    if( compGroup != targets[i]._compGroup){
819  	      JASSERT(false)(compGroup)(targets[i]._compGroup)
820  		.Text("ERROR: Restored programs belongs to different computation IDs");
821  	    }else if( numPeers != targets[i]._numPeers ){
822  	      JASSERT(false)(numPeers)(targets[i]._numPeers)
823  		.Text("ERROR: Different numpber of processes saved in checkpoint images");
824  	    }
825  	  }
826  	
827  	  //------------------------
828  	  DmtcpWorker worker ( false );
829  	  WorkerState::setCurrentState ( WorkerState::RESTARTING );
830  	  ConnectionState ckptCoord ( conToFd );
831  	  worker.restoreSockets ( ckptCoord, compGroup, numPeers, coordTstamp );
832  	
833  	#ifndef PID_VIRTUALIZATION
834  	  int i = (int)targets.size();
835  	
836  	  //fork into targs.size() processes
837  	  while(--i > 0){
838  	    int cid = fork();
839  	    if(cid==0) break;
840  	    else JASSERT(cid>0);
841  	  }
842  	  RestoreTarget& targ = targets[i];
843  	
844  	  JTRACE("forked, restoring process")(i)(targets.size())(targ.pid())(getpid());
845  	
846  	  //change UniquePid
847  	  UniquePid::resetOnFork(targ.pid());
848  	
849  	  //Reconnect to dmtcp_coordinator
850  	  WorkerState::setCurrentState ( WorkerState::RESTARTING );
851  	  worker.connectToCoordinatorWithoutHandshake();
852  	  worker.sendCoordinatorHandshake(targ.procname());
853  	
854  	  //restart targets[i]
855  	  targets[i].dupAllSockets ( slidingFd );
856  	  targets[i].mtcpRestart();
857  	
858  	  JASSERT ( false ).Text ( "unreachable" );
859  	  return -1;
860  	#else
861  	  size_t i = targets.size();
862  	
863  	  // Create roots vector, assign children to their parents.
864  	  // Delete children that don't exist.
865  	  BuildProcessTree();
866  	
867  	  // Process all checkpoints to find one of them that can switch
868  	  // needed group to foreground.
869  	  ProcessGroupInfo();
870  	  // Create session meta-information in each node of the process tree.
871  	  // Node contains info about all sessions which exists at lower levels.
872  	  // Also node is aware of session leader existence at lower levels.
873  	  SetupSessions();
874  	
875  	  /* Create the file to hold the pid/tid maps. */
876  	  openOriginalToCurrentMappingFiles();
877  	
878  	  int pgrp_index=-1;
879  	  JTRACE ( "Creating ROOT Processes" )(roots.size());
880  	  for ( int j = 0 ; j < roots.size(); ++j )
881  	  {
882  	    if( roots[j].indep == false ){
883  	      // We will restore this process from one of the independent roots.
884  	      continue;
885  	    }
886  	    if (pgrp_index == -1 && !roots[j].t->isInitChild() ){
887  	      pgrp_index = j;
888  	      continue;
889  	    }
890  	
891  	    pid_t cid = fork();
892  	    if ( cid == 0 ){
893  	      JTRACE ( "Root of process tree" ) ( _real_getpid() ) ( _real_getppid() );
894  	      if( roots[j].t->isInitChild() ){
895  	        JTRACE ( "Create init-child process" ) ( _real_getpid() )
896  						       ( _real_getppid() );
897  	        if( fork() )
898  	          _exit(0);
899  	      }
900  	      roots[j].t->CreateProcess(worker, slidingFd);
901  	      JASSERT (false) . Text( "Unreachable" );
902  	    }
903  	    JASSERT ( cid > 0 );
904  	    if( roots[j].t->isInitChild() ){
905  	      waitpid(cid, NULL, 0);
906  	    }
907  	  }
908  	
909  	  JTRACE("Restore processes without corresponding Root Target");
910  	  int flat_index = -1;
911  	  int j = 0;
912  	  if( pgrp_index < 0 ){ // No root processes at all
913  	    // Find first flat process that can replace currently running
914  	    //   dmtcp_restart context.
915  	    for (j = 0; j < targets.size(); ++j){
916  	      if( !targets[j]._used ){
917  	            // Save first flat-like process to be restored after all others
918  	            flat_index = j;
919  	            j++;
920  	            break;
921  	      }
922  	    }
923  	  }
924  	  // Use j set to 0 (if at least one root non-init-child process exists),
925  	  // or else j set to some value if no such process found.
926  	  for(; j < targets.size(); ++j)
927  	  {
928  	    if( !targets[j]._used ){
929  	      if( pgrp_index < 0 ){
930  	          // Save first flat-like process to be restored after all others
931  	          pgrp_index = j;
932  	          continue;
933  	      }else{
934  	        targets[j].CreateProcess(worker, slidingFd);
935  	        JTRACE("Need in flat-like restore for process")(targets[j].pid());
936  	      }
937  	    }
938  	  }
939  	
940  	  if( pgrp_index >=0 ){
941  	    JTRACE("Restore first Root Target")(roots[pgrp_index].t->pid());
942  	    roots[pgrp_index].t->CreateProcess(worker, slidingFd);
943  	  }else if (flat_index >= 0){
944  	    JTRACE("Restore first Flat Target")(targets[flat_index].pid());
945  	    targets[flat_index].CreateProcess(worker, slidingFd );
946  	  }else{
947  	    // FIXME: Under what conditions will this path be exercised?
948  	    JNOTE ("unknown type of target?") (targets[flat_index]._path);
949  	  }
950  	#endif
951  	}
952  	
953  	#ifdef PID_VIRTUALIZATION
954  	void BuildProcessTree()
955  	{
956  	  for (size_t j = 0; j < targets.size(); ++j)
957  	  {
958  	    VirtualPidTable& virtualPidTable = targets[j].getVirtualPidTable();
959  	    originalPidTable.insertFromVirtualPidTable ( virtualPidTable );
960  	    if( virtualPidTable.isRootOfProcessTree() == true ){
961  	      // If this process is independent (root of process tree
962  	      RootTarget rt;
963  	      rt.t = &targets[j];
964  	      rt.indep = true;
965  	      roots.push_back(rt);
966  	      targets[j]._used = true;
967  	    }else if( !targets[j]._used ){
968  	      // We set used flag if we use target as somebodys child. If it is used - no need to check is it roor
969  	      // Iterate through all targets and try to find the one who has this process
970  	      // as child process
971  	      JTRACE("Process is not root of process tree: try to find if it has parent");
972  	      bool is_root = true;
973  	      for (size_t i = 0; i < targets.size(); i++) {
974  	        VirtualPidTable & virtualPidTable = targets[i].getVirtualPidTable();
975  	        VirtualPidTable::iterator it;
976  	        // Search inside the child list of target[j], make sure that i != j
977  	        for (it = virtualPidTable.begin(); (i != j) && (it != virtualPidTable.end()) ; it++) {
978  	          UniquePid& childUniquePid = it->second;
979  	          JTRACE("Check child")(childUniquePid)(" parent ")(targets[i].pid())("checked ")(targets[j].pid());
980  	          if (childUniquePid == targets[j].pid()){
981  	            is_root = false;
982  	            break;
983  	          }
984  	        }
985  	      }
986  	      JTRACE("Root detection:")(is_root)(targets[j].pid());
987  	      if( is_root ){
988  	        RootTarget rt;
989  	        rt.t = &targets[j];
990  	        rt.indep = true;
991  	        roots.push_back(rt);
992  	        targets[j]._used = true;
993  	      }
994  	    }
995  	
996  	    // Add all children
997  	    VirtualPidTable::iterator it;
998  	    for(it = virtualPidTable.begin(); it != virtualPidTable.end(); it++ ){
999  	      // find target
1000 	      bool found = false;
1001 	      pid_t childOriginalPid = it->first;
1002 	      UniquePid& childUniquePid = it->second;
1003 	
1004 	      for ( size_t i = 0; i < targets.size(); i++ )
1005 	      {
1006 	        if ( childUniquePid == targets[i].pid() )
1007 	        {
1008 	          found = 1;
1009 	          JTRACE ( "Add child to current target" ) ( targets[j].pid() ) ( childUniquePid );
1010 	          targets[i]._used = true;
1011 	          targets[j].addChild(&targets[i]);
1012 	        }
1013 	      }
1014 	      if ( !found ){
1015 	        JTRACE("Child not found")(childOriginalPid);
1016 	        virtualPidTable.erase( childOriginalPid );
1017 	      }
1018 	    }
1019 	  }
1020 	}
1021 	
1022 	/*
1023 	 * Group processing
1024 	 * 1. Divide all processes into sessions
1025 	 * 2. Divide processes in each session into groups
1026 	 * 3. In each group check that stored foreground values are equal.
1027 	 *    If not, something's wrong:  ABORT
1028 	 * 4. In each session choose the process that can bring appropriate group
1029 	 *    to foreground
1030 	 * 5. Serialize information about chosen UniquePIDs in following
1031 	 *    format: "COUNT:unique-pid1:unique-pid2:..."
1032 	 * 6. Deserialize information from step 5 in forked and restored processes.
1033 	 *
1034 	 */
1035 	
1036 	class group {
1037 	public:
1038 	  group(){
1039 	    gid = -2;
1040 	  }
1041 	  pid_t gid;
1042 	  vector<RestoreTarget*> targets;
1043 	};
1044 	
1045 	class session{
1046 	public:
1047 	  session(){
1048 	    sid = -2;
1049 	    fgid = -2;
1050 	  }
1051 	  pid_t sid;
1052 	  pid_t fgid;
1053 	  map<pid_t,group> groups;
1054 	  typedef map<pid_t,group>::iterator group_it;
1055 	  UniquePid upid;
1056 	};
1057 	
1058 	void ProcessGroupInfo()
1059 	{
1060 	  map<pid_t,session> smap;
1061 	  map<pid_t,session>::iterator it;
1062 	
1063 	  // 1. divide processes into sessions and groups
1064 	  for (size_t j = 0; j < targets.size(); j++)
1065 	  {
1066 	    VirtualPidTable& virtualPidTable = targets[j].getVirtualPidTable();
1067 	    JTRACE("Process ")
1068 	      (virtualPidTable.pid())(virtualPidTable.ppid())(virtualPidTable.sid())
1069 	      (virtualPidTable.gid())(virtualPidTable.fgid())
1070 	      (virtualPidTable.isRootOfProcessTree());
1071 	
1072 	    pid_t sid = virtualPidTable.sid();
1073 	    pid_t gid = virtualPidTable.gid();
1074 	    pid_t fgid = virtualPidTable.fgid();
1075 	
1076 	    /*
1077 	    // If group ID doesn't belong to known PIDs, indicate that fact
1078 	    //   using -1 value.
1079 	    if( !virtualPidTable.pidExists(gid) ){
1080 	    JTRACE("DROP gid")(gid);
1081 	    virtualPidTable.setgid(-1);
1082 	    gid = -1;
1083 	    }
1084 	    // If foreground group ID not belongs to known PIDs,
1085 	    //   indicate that fact using -1 value.
1086 	    if( !virtualPidTable.pidExists(fgid) ){
1087 	    JTRACE("DROP fgid")(fgid);
1088 	    virtualPidTable.setfgid(-1);
1089 	    fgid = -1;
1090 	    }
1091 	    */
1092 	
1093 	    session &s = smap[sid];
1094 	    // if this is first element of this session
1095 	    if( s.sid == -2 ){
1096 	      s.sid = sid;
1097 	    }
1098 	    group &g = smap[sid].groups[gid];
1099 	    // if this is first element of group gid
1100 	    if( g.gid == -2 ){
1101 	      g.gid = gid;
1102 	    }
1103 	    g.targets.push_back(&targets[j]);
1104 	  }
1105 	
1106 	  // 2. Check if foreground setting is correct
1107 	  it = smap.begin();
1108 	  for(;it != smap.end();it++){
1109 	    session &s = it->second;
1110 	    session::group_it g_it = s.groups.begin();
1111 	    pid_t fgid = -2;
1112 	    for(; g_it!=s.groups.end();g_it++){
1113 	      group &g = g_it->second;
1114 	      for(size_t k=0; k<g.targets.size(); k++){
1115 	        VirtualPidTable& virtualPidTable = g.targets[k]->getVirtualPidTable();
1116 	        pid_t cfgid = virtualPidTable.fgid();
1117 	        if( fgid == -2 ){
1118 	          fgid = cfgid;
1119 	        }else if( fgid != -1 && cfgid != -1 && fgid != cfgid ){
1120 	          printf("Error: process from same session stores different"
1121 	              " foreground group ID: %d, %d\n", fgid, cfgid);
1122 	          // DEBUG PRINTOUT:
1123 	          {
1124 	            session::group_it g_it1 = s.groups.begin();
1125 	            for(; g_it1!=s.groups.end();g_it1++){
1126 	              group &g1 = g_it1->second;
1127 	              for(size_t m=0; m<g1.targets.size() ;m++){
1128 	                VirtualPidTable& virtualPidTable = g1.targets[m]->getVirtualPidTable();
1129 	                pid_t pid = virtualPidTable.pid();
1130 	                pid_t cfgid = virtualPidTable.fgid();
1131 	                printf("PID=%d <--> FGID = %d\n",pid,cfgid);
1132 	              }
1133 	            }
1134 	          }
1135 	          abort();
1136 	        }
1137 	      }
1138 	      JTRACE("Checked ")(fgid);
1139 	    }
1140 	    s.fgid = fgid;
1141 	    if( s.groups.find(s.fgid) == s.groups.end() ){
1142 	      // foreground group is missing, don't need to change foreground groop
1143 	      s.fgid = -1;
1144 	    }
1145 	
1146 	    {
1147 	      session::group_it g_it1 = s.groups.begin();
1148 	      for(; g_it1!=s.groups.end();g_it1++){
1149 	        group &g1 = g_it1->second;
1150 	        for(size_t m=0; m<g1.targets.size(); m++){
1151 	          VirtualPidTable& virtualPidTable = g1.targets[m]->getVirtualPidTable();
1152 	          pid_t pid = virtualPidTable.pid();
1153 	          pid_t cfgid = virtualPidTable.fgid();
1154 	          JTRACE("PID=%d <--> FGID = %d")(pid)(cfgid);
1155 	        }
1156 	      }
1157 	    }
1158 	  }
1159 	
1160 	  // Print out session mapping.
1161 	  JTRACE("Session number:")(smap.size());
1162 	  it = smap.begin();
1163 	  for( ; it != smap.end(); it++ ){
1164 	    session &s = it->second;
1165 	    JTRACE("Session printout:")(s.sid)(s.fgid)(s.upid.toString().c_str());
1166 	    session::group_it g_it = s.groups.begin();
1167 	    for(; g_it!=s.groups.end();g_it++){
1168 	      group &g = g_it->second;
1169 	      JTRACE("\tGroup ID: ")(g.gid);
1170 	      /*
1171 	         for(k=0; k<g.targets.size() ;k++){
1172 	         printf("%d ", g.targets[k]->pid().pid());
1173 	         }
1174 	         printf("\n");
1175 	         */
1176 	    }
1177 	  }
1178 	}
1179 	
1180 	void SetupSessions()
1181 	{
1182 	  for(size_t j=0; j < roots.size(); j++){
1183 	    roots[j].t->setupSessions();
1184 	  }
1185 	
1186 	  for(size_t i = 0; i < roots.size(); i++){
1187 	    for(size_t j = 0; j < roots.size(); j++){
1188 	      if( i == j )
1189 	        continue;
1190 	      pid_t sid;
1191 	      if( (sid = (roots[i].t)->checkDependence(roots[j].t)) >= 0 ){
1192 	        // it2 depends on it1
1193 	        JTRACE("Root target j depends on Root target i")(i)(roots[i].t->pid())(j)(roots[j].t->pid());
1194 	        (roots[i].t)->addRoot(roots[j].t, sid);
1195 	        roots[j].indep = false;
1196 	      }
1197 	    }
1198 	  }
1199 	}
1200 	
1201 	int openSharedFile(dmtcp::string name, int flags)
1202 	{
1203 	  int fd;
1204 	  // try to create, truncate & open file
1205 	  if( (fd = open(name.c_str(), O_EXCL|O_CREAT|O_TRUNC | flags, 0600)) >= 0) {
1206 	    return fd;
1207 	  }
1208 	  if (fd < 0 && errno == EEXIST) {
1209 	    if ((fd = open(name.c_str(), flags, 0600)) > 0) {
1210 	      return fd;
1211 	    }
1212 	  }
1213 	  // unable to create & open OR open
1214 	  JASSERT( false )(name)(strerror(errno)).Text("Cannot open file");
1215 	  return -1;
1216 	}
1217 	
1218 	static void openOriginalToCurrentMappingFiles()
1219 	{
1220 	  dmtcp::ostringstream pidMapFile, pidMapCountFile;
1221 	  dmtcp::ostringstream shmidListFile, shmidMapFile;
1222 	  int fd;
1223 	
1224 	  shmidMapFile << dmtcpTmpDir << "/dmtcpShmidMap."
1225 	     << compGroup << "." << std::hex << coordTstamp;
1226 	  shmidListFile << dmtcpTmpDir << "/dmtcpShmidList."
1227 	     << compGroup << "." << std::hex << coordTstamp;
1228 	
1229 	  pidMapFile << dmtcpTmpDir << "/dmtcpPidMap."
1230 	     << compGroup << "." << std::hex << coordTstamp;
1231 	  pidMapCountFile << dmtcpTmpDir << "/dmtcpPidMapCount."
1232 	     << compGroup << "." << std::hex << coordTstamp;
1233 	
1234 	  // Open and create shmidListFile if it doesn't exist.
1235 	  JTRACE("Open dmtcpShmidMapFile")(shmidListFile.str());
1236 	  fd = openSharedFile(shmidListFile.str(), (O_WRONLY|O_APPEND));
1237 	  JASSERT ( fd != -1 );
1238 	  JASSERT ( dup2 ( fd, PROTECTED_SHMIDLIST_FD ) == PROTECTED_SHMIDLIST_FD )
1239 		  ( shmidListFile.str() );
1240 	  close (fd);
1241 	
1242 	  // Open and create shmidMapFile if it doesn't exist.
1243 	  JTRACE("Open dmtcpShmidMapFile")(shmidMapFile.str());
1244 	  fd = openSharedFile(shmidMapFile.str(), (O_WRONLY|O_APPEND));
1245 	  JASSERT ( fd != -1 );
1246 	  JASSERT ( dup2 ( fd, PROTECTED_SHMIDMAP_FD ) == PROTECTED_SHMIDMAP_FD )
1247 		  ( shmidMapFile.str() );
1248 	  close (fd);
1249 	
1250 	  // Open and create pidMapFile if it doesn't exist.
1251 	  JTRACE("Open dmtcpPidMapFile")(pidMapFile.str());
1252 	  fd = openSharedFile(pidMapFile.str(), (O_WRONLY|O_APPEND));
1253 	  JASSERT ( fd != -1 );
1254 	  JASSERT ( dup2 ( fd, PROTECTED_PIDMAP_FD ) == PROTECTED_PIDMAP_FD )
1255 		  ( pidMapFile.str() );
1256 	  close (fd);
1257 	
1258 	  // Open and create pidMapCountFile if it doesn't exist.
1259 	  JTRACE("Open dmtcpPidMapCount files for writing")(pidMapCountFile.str());
1260 	  fd = openSharedFile(pidMapCountFile.str(), O_RDWR);
1261 	  JASSERT ( fd != -1 );
1262 	  JASSERT ( dup2 ( fd, PROTECTED_PIDMAPCNT_FD ) == PROTECTED_PIDMAPCNT_FD )
1263 		  ( pidMapCountFile.str() );
1264 	  close(fd);
1265 	
1266 	  dmtcp::Util::lockFile(PROTECTED_PIDMAPCNT_FD);
1267 	
1268 	  // Initialize pidMapCountFile with zero value.
1269 	  static jalib::JBinarySerializeWriterRaw countwr(pidMapCountFile.str(),
1270 							  PROTECTED_PIDMAPCNT_FD);
1271 	  if( countwr.isempty() ){
1272 	    JTRACE("pidMapCountFile is empty.  Initialize it with count = 0")
1273 	      (pidMapCountFile.str());
1274 	    size_t numMaps = 0;
1275 	    dmtcp::VirtualPidTable::serializeEntryCount (countwr, numMaps);
1276 	    fsync(PROTECTED_PIDMAPCNT_FD);
1277 	  }else{
1278 	    JTRACE("pidMapCountFile is not empty - do nothing");
1279 	  }
1280 	
1281 	  dmtcp::Util::unlockFile(PROTECTED_PIDMAPCNT_FD);
1282 	}
1283 	#endif
1284 	
1285 	static void runMtcpRestore ( const char* path, int offset )
1286 	{
1287 	  static dmtcp::string mtcprestart = jalib::Filesystem::FindHelperUtility ( "mtcp_restart" );
1288 	
1289 	  // Tell mtcp_restart process to write its debugging information to
1290 	  // PROTECTED_STDERR_FD. This way we prevent it from spitting out garbage onto
1291 	  // FD_STDERR if it is being used by the user process in a special way.
1292 	  char protected_stderr_fd_str[16];
1293 	  sprintf(protected_stderr_fd_str, "%d", PROTECTED_STDERR_FD);
1294 	
1295 	#ifdef USE_MTCP_FD_CALLING
1296 	  int fd = ConnectionToFds::openMtcpCheckpointFile(path);
1297 	  char buf[64];
1298 	  sprintf(buf, "%d", fd);
1299 	  char buf2[64];
1300 	  // gzip_child_pid set by openMtcpCheckpointFile() above.
1301 	  sprintf(buf2, "%d", dmtcp::ConnectionToFds::gzip_child_pid);
1302 	
1303 	  char* newArgs[] = {
1304 	    ( char* ) mtcprestart.c_str(),
1305 	    ( char* ) "--stderr-fd",
1306 	    protected_stderr_fd_str,
1307 	    ( char* ) "--fd",
1308 	    buf,
1309 	    ( char* ) "--gzip-child-pid",
1310 	    buf2,
1311 	    NULL
1312 	  };
1313 	  if (dmtcp::ConnectionToFds::gzip_child_pid == -1) // If no gzip compression
1314 	    newArgs[3] = NULL;
1315 	
1316 	  JTRACE ( "launching mtcp_restart --fd" )(fd)(path);
1317 	#else
1318 	  char buf[64];
1319 	  sprintf(buf, "%d", offset);
1320 	
1321 	  char* newArgs[] = {
1322 	    ( char* ) mtcprestart.c_str(),
1323 	    ( char* ) "--stderr-fd",
1324 	    protected_stderr_fd_str,
1325 	    ( char* ) "--offset",
1326 	    buf,
1327 	    (char*) path,
1328 	    NULL
1329 	  };
1330 	
1331 	  JTRACE ( "launching mtcp_restart --offset" )(path)(offset);
1332 	
1333 	#endif
1334 	
1335 	  execvp ( newArgs[0], newArgs );
1336 	  JASSERT ( false ) ( newArgs[0] ) ( newArgs[1] ) ( JASSERT_ERRNO )
1337 	          .Text ( "exec() failed" );
1338 	}