1    	/****************************************************************************
2    	 *   Copyright (C) 2006-2010 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3    	 *   jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu              *
4    	 *                                                                          *
5    	 *   This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src).  *
6    	 *                                                                          *
7    	 *  DMTCP:dmtcp/src is free software: you can redistribute it and/or        *
8    	 *  modify it under the terms of the GNU Lesser General Public License as   *
9    	 *  published by the Free Software Foundation, either version 3 of the      *
10   	 *  License, or (at your option) any later version.                         *
11   	 *                                                                          *
12   	 *  DMTCP:dmtcp/src is distributed in the hope that it will be useful,      *
13   	 *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
14   	 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
15   	 *  GNU Lesser General Public License for more details.                     *
16   	 *                                                                          *
17   	 *  You should have received a copy of the GNU Lesser General Public        *
18   	 *  License along with DMTCP:dmtcp/src.  If not, see                        *
19   	 *  <http://www.gnu.org/licenses/>.                                         *
20   	 ****************************************************************************/
21   	
22   	#include <unistd.h>
23   	
24   	#include <stdlib.h>
25   	#include <string>
26   	#include <stdio.h>
27   	#include  "../jalib/jassert.h"
28   	#include  "../jalib/jfilesystem.h"
29   	#include "connectionmanager.h"
30   	#include "dmtcpworker.h"
31   	#include "dmtcpmessagetypes.h"
32   	#include "connectionstate.h"
33   	#include "mtcpinterface.h"
34   	#include "syscallwrappers.h"
35   	#include "protectedfds.h"
36   	#include "util.h"
37   	#include <sys/types.h>
38   	#include <sys/stat.h>
39   	#include <fcntl.h>
40   	#include <errno.h>
41   	#include <vector>
42   	
43   	// Some global definitions
44   	static dmtcp::UniquePid compGroup;
45   	static int numPeers;
46   	static int coordTstamp = 0;
47   	
48   	dmtcp::string dmtcpTmpDir = "/DMTCP/UnInitialized/Tmp/Dir";
49   	
50   	using namespace dmtcp;
51   	
52   	#ifdef PID_VIRTUALIZATION
53   	static void openOriginalToCurrentMappingFiles();
54   	void unlockPidMapFile();
55   	#endif
56   	static void runMtcpRestore ( const char* path, int offset );
57   	
58   	namespace
59   	{
60   	
61   	#ifdef PID_VIRTUALIZATION
62   	
63   	  class OriginalPidTable {
64   	    public:
65   	      OriginalPidTable(){}
66   	
67   	      void insertFromVirtualPidTable ( dmtcp::VirtualPidTable& vt )
68   	      {
69   	        dmtcp::vector< pid_t > tmpVector;
70   	
71   	        _insert(vt.pid());
72   	
73   	        tmpVector = vt.getChildPidVector();
74   	        for ( size_t i = 0; i < tmpVector.size(); ++i )
75   	          _insert(tmpVector[i]);
76   	
77   	        tmpVector = vt.getTidVector();
78   	        for ( size_t i = 0; i < tmpVector.size(); ++i )
79   	          _insert(tmpVector[i]);
80   	      }
81   	
82   	      void _insert( pid_t pid )
83   	      {
84   	        if (!isConflictingChildPid (pid) /* && newVector[i] != getpid()*/) {
85   	          _vector.push_back ( pid );
86   	          JTRACE("New Pid Pushed to PidVector") (pid);
87   	        }
88   	      }
89   	
90   	      bool isConflictingChildPid ( pid_t pid )
91   	      {
92   	        //iterator i = _vector.find ( pid );
93   	        //if ( i == _vector.end() )
94   	        //  return false;
95   	        for ( size_t i = 0; i < _vector.size(); ++i )
96   	          if ( _vector[i] == pid )
97   	            return true;
98   	
99   	        return false;
100  	      }
101  	
102  	      size_t numPids () { return _vector.size(); }
103  	
104  	    private:
105  	      typedef dmtcp::vector< pid_t >::iterator iterator;
106  	      dmtcp::vector< pid_t > _vector;
107  	  };
108  	
109  	  OriginalPidTable originalPidTable;
110  	
111  	#endif
112  	
113  	
114  	  class RestoreTarget
115  	  {
116  	  public:
117  	    RestoreTarget ( const dmtcp::string& path )
118  	      : _path ( path )
119  	    {
120  	      JASSERT ( jalib::Filesystem::FileExists ( _path ) ) ( _path )
121  			.Text ( "checkpoint file missing" );
122  	#ifdef PID_VIRTUALIZATION
123  	      _offset = _conToFd.loadFromFile(_path, _compGroup, _numPeers,
124  					      _virtualPidTable);
125  	      _virtualPidTable.erase(getpid());
126  	      _roots.clear();
127  	      _children.clear();
128  	      _smap.clear();
129  	      _used = 0;
130  	#else
131  	      _offset = _conToFd.loadFromFile(_path, _compGroup, _numPeers);
132  	#endif
133  	      JTRACE ( "restore target" ) ( _path ) (_numPeers ) (_compGroup)
134  		                          ( _conToFd.size() ) (_offset);
135  	    }
136  	
137  	    void dupAllSockets ( SlidingFdTable& slidingFd )
138  	    {
139  	      int lastfd = -1;
140  	      dmtcp::vector<int> fdlist;
141  	      for ( ConnectionToFds::const_iterator i = _conToFd.begin();
142  		    i!=_conToFd.end(); ++i )
143  	        {
144  	          Connection& con = ConnectionList::instance() [i->first];
145  	          if ( con.conType() == Connection::INVALID ){
146  	            JWARNING(false)(i->first).Text("Can't restore invalid Connection");
147  	            continue;
148  	          }
149  	
150  	          const dmtcp::vector<int>& fds = i->second;
151  	          for ( size_t x=0; x<fds.size(); ++x )
152  		    {
153  		      int fd = fds[x];
154  		      fdlist.push_back ( fd );
155  		      slidingFd.freeUpFd ( fd );
156  		      int oldFd = slidingFd.getFdFor ( i->first );
157  		      JTRACE ( "restoring fd" ) ( i->first ) ( oldFd ) ( fd );
158  		      //let connection do custom dup2 handling
159  		      con.restartDup2( oldFd, fd );
160  	
161  		      if ( fd > lastfd )
162  			{
163  			  lastfd = fd;
164  			}
165  		    }
166  	        }
167  	
168  	      size_t j;
169  	      for ( int i = 0 ; i < slidingFd.startFd() ; i++ )
170  	        {
171  	          for ( j = 0 ; j < fdlist.size() ; j++ )
172  		    {
173  		      if ( fdlist.at ( j ) == i )
174  			break;
175  		    }
176  	          if ( j == fdlist.size() )
177  		    {
178  		      _real_close ( i );
179  		    }
180  	        }
181  	
182  	      slidingFd.closeAll();
183  	    }
184  				
185  	    int find_stdin( SlidingFdTable& slidingFd )
186  	    {
187  	      for ( ConnectionToFds::const_iterator i = _conToFd.begin();
188  	          i!=_conToFd.end(); ++i )
189  	      {
190  	        const dmtcp::vector<int>& fds = i->second;
191  	        for ( size_t x=0; x<fds.size(); ++x )
192  	        {
193  	          if (fds[x] == STDIN_FILENO){
194  	            JTRACE("Found stdin: fds[x] <---> slidingFd.getFdFor()")
195  	              (x) (fds[x]) (slidingFd.getFdFor ( i->first ));
196  	            return slidingFd.getFdFor ( i->first );
197  	          }
198  	        }
199  	      }
200  	      return -1;
201  	    }
202  	
203  	    void mtcpRestart()
204  	    {
205  	      runMtcpRestore ( _path.c_str(), _offset );
206  	    }
207  	
208  	    const UniquePid& pid() const { return _conToFd.pid(); }
209  	    const dmtcp::string& procname() const { return _conToFd.procname(); }
210  	
211  	#ifdef PID_VIRTUALIZATION
212  	    typedef map<pid_t,bool> sidMapping;
213  	    typedef sidMapping::iterator s_iterator;
214  	    typedef vector<RestoreTarget *>::iterator t_iterator;
215  	
216  	    VirtualPidTable& getVirtualPidTable() { return _virtualPidTable; }
217  	    void addChild(RestoreTarget *t){ _children.push_back(t); }
218  	
219  	    bool isSessionLeader(){
220  	      JTRACE("")(_virtualPidTable.sid()) (pid().pid());
221  	      if( _virtualPidTable.sid() == pid().pid() )
222  		return true;
223  	      else
224  		return false;
225  	    }
226  	
227  	    bool isGroupLeader(){
228  	      JTRACE("")(_virtualPidTable.sid()) (pid().pid());
229  	      if( _virtualPidTable.gid() == pid().pid() )
230  		return true;
231  	      else
232  		return false;
233  	    }
234  	
235  	    bool isForegroundProcess() {
236  	      JTRACE("")(_virtualPidTable.sid()) (pid().pid());
237  	      if( _virtualPidTable.fgid() == _virtualPidTable.gid() )
238  		return true;
239  	      else
240  		return false;
241  	    }
242  	
243  	    bool isInitChild(){
244  	      JTRACE("")(_virtualPidTable.ppid());
245  	      if( _virtualPidTable.ppid() == 1 )
246  		return true;
247  	      else
248  		return false;
249  	    }
250  	
251  	    int addRoot(RestoreTarget *t, pid_t sid){
252  	      if( isSessionLeader() && _virtualPidTable.sid() == sid ){
253  		_roots.push_back(t);
254  		return 1;
255  	      }else{
256  		t_iterator it = _children.begin();
257  		for(; it != _children.end(); it++){
258  		  if( (*it)->addRoot(t, sid) )
259  		    return 1;
260  		}
261  	      }
262  	      return 0;
263  	    }
264  	
265  	    // Traverse this process subtree and setup information about sessions
266  	    //   and their leaders for all children.
267  	    sidMapping &setupSessions() {
268  	      pid_t sid = _virtualPidTable.sid();
269  	      if( !_children.size() ) {
270  		_smap[sid] = isSessionLeader();
271  		return _smap;
272  	      }
273  	      // We have at least one child
274  	      t_iterator it = _children.begin();
275  	      _smap = (*it)->setupSessions();
276  	      for(it++; it != _children.end();it++) {
277  		sidMapping tmp = (*it)->setupSessions();
278  		s_iterator it1 = tmp.begin();
279  		for(;it1 != tmp.end(); it1++) {
280  		  s_iterator it2 = _smap.find(it1->first);
281  		  if( it2 != _smap.end() ) {
282  		    // mapping already exist
283  		    if( it2->second != it1->second ) {
284  		      // Session was created after child creation.  So child from one
285  		      // thread cannot be member of session of child from other thread.
286  		      JASSERT(false). Text("One child contains session leader"
287  					   " and other contains session member!\n");
288  		      exit(0);
289  		    }
290  		  } else {
291  		    // add new mapping
292  		    _smap[it1->first] = it1->second;
293  		  }
294  		}
295  	      }
296  	
297  	      s_iterator sit = _smap.find(sid);
298  	      if( sit != _smap.end() ) {
299  		if( sit->second && !isSessionLeader() ) {
300  		  // child is leader and parent is slave - impossible
301  		  JASSERT(false)
302  		         .Text("child is leader and parent is slave - impossible\n");
303  		  exit(0);
304  		}
305  	      }
306  	      _smap[sid] = isSessionLeader();
307  	      return _smap;
308  	}
309  	
310  	    void printMapping(){
311  	      t_iterator it = _children.begin();
312  	      for(; it != _children.end(); it++){
313  		(*it)->printMapping();
314  	      }
315  	      JTRACE("")(pid());
316  	      s_iterator sit = _smap.begin();
317  	      for(; sit != _smap.end(); sit++){
318  		JTRACE("") (sit->first) (sit->second);
319  	      }
320  	    }
321  	
322  	    sidMapping &getSmap(){ return _smap; }
323  	
324  	    pid_t checkDependence(RestoreTarget *t){
325  	      sidMapping smap = t->getSmap();
326  	      s_iterator ext = smap.begin();
327  	      // Run through sessions --> has leader mapping
328  	      for(; ext != smap.end(); ext++){
329  		if( ext->second == false ){
330  		  // Session pointed by ext has no leader in target t process tree
331  		  s_iterator intern = _smap.find(ext->first);
332  		  if( intern != _smap.end() && intern->second == true ){
333  		    // internal target has session leader in its tree
334  		    // TODO: can process trees be connected through several sessions?
335  		    return ext->first;
336  		  }
337  		}
338  	      }
339  	      return -1;
340  	    }
341  	
342  	    void bringToForeground(SlidingFdTable& slidingFd)
343  	    {
344  	      char controllingTerm[L_ctermid];
345  	      pid_t pid;
346  	
347  	      int sin = find_stdin(slidingFd);
348  	
349  	      if( isSessionLeader() ){
350  	        // XXX: Where is the controlling terminal being set?
351  		char *ptr =  ttyname(sin);
352  		int fd = open(ptr,O_RDWR);
353  		if( ctermid(controllingTerm) ){
354  		  int tfd = open(ptr,O_RDONLY);
355  		  if( tfd >= 0 ){
356  		    JTRACE("Setting current controlling terminal") (controllingTerm);
357  		    close(tfd);
358  		  }else if (ptr == NULL){
359  	            JTRACE("Cannot restore controlling terminal") (ttyname(sin));
360  	          } else {
361  		    JWARNING(false) (ttyname(sin)) 
362  	                    .Text("Cannot restore controlling terminal");
363  		  }
364  		}
365  		if (fd >= 0) close(fd);
366  	      }
367  	
368  	      pid_t gid = getpgid(0);
369  	      pid_t fgid = tcgetpgrp(sin);
370  	
371  	      if( !isForegroundProcess() )
372  		return;
373  	      if( !isGroupLeader()  ){
374  		return;
375  	      }
376  	
377  	      if( gid != fgid ){
378  		if( !(pid = fork()) ){ // fork subversive process
379  		  // This process moves itself to current foreground group
380  		  // and then changes foreground group to what we need
381  		  // so it works as a spy, saboteur or wrecker :)
382  		  // -- Artem
383  		  JTRACE("Change current GID to foreground GID.");
384  	
385  		if( setpgid(0, fgid) ){
386  	          if (fgid == -1) {
387  	            JTRACE("CANNOT Change current GID to foreground GID")
388  	                  (getpid()) (fgid) (_virtualPidTable.fgid()) (gid) (JASSERT_ERRNO);
389  	          } else {
390  	            JWARNING(false) 
391  	                     (getpid()) (fgid) (_virtualPidTable.fgid()) (gid) (JASSERT_ERRNO)
392  	                    .Text("CANNOT Change current GID to foreground GID");
393  	          }
394  	 	  fflush(stdout);
395  	 	  exit(0);
396  		}
397  	
398  	        if( tcsetpgrp(sin, gid) ){
399  		  printf("CANNOT Move parent GID to foreground: %s\n",
400  			 strerror(errno));
401  	 	  printf("PID=%d, FGID=%d, GID=%d\n",getpid(),fgid,gid);
402  	 	  printf("PID=%d, FGID=%d, _FGID=%d, GID=%d\n",
403  			 getpid(),fgid,_virtualPidTable.fgid(), gid);
404  	 	  fflush(stdout);
405  	 	  exit(0);
406  	 	  }
407  	
408  		  JTRACE("Finish foregrounding.")(getpid())(getpgid(0))(tcgetpgrp(0));
409  		  exit(0);
410  		}else{
411  		  int status;
412  		  wait(&status);
413  		}
414  	      }
415  	    }
416  	
417  	    void restoreGroup( SlidingFdTable& slidingFd )
418  	    {
419  	      if( isGroupLeader() ){
420  		// create new group where this process becomes a leader
421  		JTRACE("Create new group.");
422  		setpgid(0, 0);
423  		bringToForeground(slidingFd);
424  	      }
425  	    }
426  	
427  	    void CreateProcess(DmtcpWorker& worker, SlidingFdTable& slidingFd)
428  	    {
429  	      dmtcp::ostringstream o;
430  	      o << dmtcpTmpDir << "/jassertlog." << pid();
431  	      JASSERT_INIT(o.str());
432  	
433  	      //change UniquePid
434  	      UniquePid::resetOnFork(pid());
435  	      VirtualPidTable &vt = _virtualPidTable;
436  	
437  	      JTRACE("")(_real_getpid())(_real_getppid())(_real_getsid(0));
438  	
439  	      vt.updateMapping(pid().pid(), _real_getpid());
440  	      pid_t psid = vt.sid();
441  	
442  	      if( !isSessionLeader() ){
443  	
444  		// Restore group information
445  		restoreGroup(slidingFd);
446  	
447  		// If process is not session leader, restore it and all children.
448  		t_iterator it = _children.begin();
449  		for(; it != _children.end(); it++){
450  		  JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
451  		  pid_t cid = forkChild();
452  	
453  		  if ( cid == 0 )
454  	            {
455  	              (*it)->CreateProcess (worker, slidingFd);
456  	              JASSERT ( false ) . Text ( "Unreachable" );
457  	            }
458  		  JASSERT ( cid > 0 );
459  		  VirtualPidTable::iterator vit = vt.begin();
460  		  for(; vit != vt.end(); vit++){
461  		    if( (*it)->pid() == vit->second ){
462  		      vt.updateMapping ( vit->first, cid );
463  		      break;
464  		    }
465  		  }
466  	
467  		}
468  	      }else{
469  		// Process is session leader.
470  		// There may be not setsid-ed children.
471  		for(t_iterator it = _children.begin(); it != _children.end(); it++){
472  		  s_iterator sit = (*it)->getSmap().find(psid);
473  		  JTRACE("Restore processes that was created before their parent called setsid()");
474  		  if( sit == (*it)->getSmap().end() ){
475  		    JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
476  		    pid_t cid = forkChild();
477  		    if ( cid == 0 )
478  		      {
479  			(*it)->CreateProcess (worker, slidingFd);
480  			JASSERT ( false ) . Text ( "Unreachable" );
481  		      }
482  		    JASSERT ( cid > 0 );
483  		    VirtualPidTable::iterator vit = _virtualPidTable.begin();
484  		    for(; vit != _virtualPidTable.end(); vit++){
485  		      if( (*it)->pid() == vit->second ){
486  			_virtualPidTable.updateMapping ( vit->first, cid );
487  		      }
488  		    }
489  		  }
490  		}
491  	
492  		pid_t nsid = setsid();
493  		JTRACE("change SID")(nsid);
494  		
495  		// Restore group information
496  		restoreGroup(slidingFd);
497  	
498  		for(t_iterator it = _children.begin(); it != _children.end(); it++) {
499  		  JTRACE("Restore processes that was created after their parent called setsid()");
500  		  s_iterator sit = (*it)->getSmap().find(psid);
501  		  if( sit != (*it)->getSmap().end() ) {
502  		    JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
503  		    pid_t cid = forkChild();
504  		    if ( cid == 0 ){
505  		      (*it)->CreateProcess (worker, slidingFd );
506  		      JASSERT ( false ) . Text ( "Unreachable" );
507  		    }
508  		    JASSERT ( cid> 0 );
509  		    VirtualPidTable::iterator vit = _virtualPidTable.begin();
510  		    for(; vit != _virtualPidTable.end(); vit++) {
511  		      if( (*it)->pid() == vit->second ) {
512  			_virtualPidTable.updateMapping ( vit->first, cid );
513  		      }
514  		    }
515  		  }
516  		}
517  	
518  		for(t_iterator it = _roots.begin() ; it != _roots.end(); it++) {
519  		  JTRACE ( "Forking Dependent Root Process" ) ( (*it)->pid() );
520  		  pid_t cid;
521  		  if( (cid = fork()) ){
522  		    waitpid(cid, NULL, 0);
523  		  }else{
524  		    if( fork() )
525  		      exit(0);
526  		    (*it)->CreateProcess(worker, slidingFd );
527  		    JASSERT (false) . Text( "Unreachable" );
528  		  }
529  		}
530  	      }
531  	
532  	      JTRACE("Child and dependent root processes forked, restoring process")
533  		    (pid())(getpid())(isGroupLeader());
534  	      // Save PID mapping information
535  	      pid_t orig = pid().pid();
536  	      pid_t curr = _real_getpid();
537  	      dmtcp::VirtualPidTable::InsertIntoPidMapFile(orig, curr);
538  	
539  	      //Reconnect to dmtcp_coordinator
540  	      WorkerState::setCurrentState ( WorkerState::RESTARTING );
541  	      worker.connectToCoordinatorWithoutHandshake();
542  	      worker.sendCoordinatorHandshake(procname(), _compGroup);
543  	      dmtcp::string serialFile = dmtcp::UniquePid::pidTableFilename();
544  	
545  	      JTRACE ( "PidTableFile: ") ( serialFile ) ( dmtcp::UniquePid::ThisProcess() );
546  	      jalib::JBinarySerializeWriter tblwr ( serialFile );
547  	      _virtualPidTable.serialize ( tblwr );
548  	      tblwr.~JBinarySerializeWriter();
549  	
550  	      int stmpfd =  open( serialFile.c_str(), O_RDONLY);
551  	      JASSERT ( stmpfd >= 0 ) ( serialFile ) ( errno );
552  	
553  	      JASSERT ( dup2 ( stmpfd, PROTECTED_PIDTBL_FD) == PROTECTED_PIDTBL_FD )
554  		      ( serialFile ) ( stmpfd );
555  	
556  	      close (stmpfd);
557  	
558  	      //restart targets[i]
559  	      dupAllSockets ( slidingFd );
560  	
561  	      mtcpRestart();
562  	
563  	      JASSERT ( false ).Text ( "unreachable" );
564  	    }
565  	
566  	
567  	    static pid_t forkChild()
568  	    {
569  	      while ( 1 ) {
570  	
571  		pid_t childPid = fork();
572  	
573  		JASSERT ( childPid != -1 ) .Text ( "fork() failed" );
574  	
575  		if ( childPid == 0 ) { /* child process */
576  		  if ( originalPidTable.isConflictingChildPid ( getpid() ) )
577  		    _exit(1);
578  		  else
579  		    return 0;
580  		}
581  		else { /* Parent Process */
582  		  if ( originalPidTable.isConflictingChildPid ( childPid ) ) {
583  		    JTRACE( "PID Conflict, creating new child" ) (childPid);
584  		    waitpid ( childPid, NULL, 0 );
585  		  }
586  		  else
587  		    return childPid;
588  		}
589  	      }
590  	
591  	      return -1;
592  	    }
593  	#endif
594  	
595  	    dmtcp::string _path;
596  	    int _offset;
597  	    ConnectionToFds _conToFd;
598  	    UniquePid _compGroup;
599  	    int _numPeers;
600  	#ifdef PID_VIRTUALIZATION
601  	    VirtualPidTable _virtualPidTable;
602  	    // Links to children of this process
603  	    vector<RestoreTarget *> _children;
604  	    // Links to roots that depend on this target
605  	    // i.e. have SID of this target in its tree.
606  	    vector<RestoreTarget *> _roots;
607  	    sidMapping _smap;
608  	    bool _used;
609  	#endif
610  	  };
611  	
612  	
613  	} // end namespace
614  	
615  	// gcc-4.3.4 -Wformat=2 issues false positives for warnings unless the format
616  	// string has at least one format specifier with corresponding format argument.
617  	// Ubuntu 9.01 uses -Wformat=2 by default.
618  	static const char* theUsage =
619  	  "USAGE:\n dmtcp_restart [OPTIONS] <ckpt1.dmtcp> [ckpt2.dmtcp...]\n\n"
620  	  "OPTIONS:\n"
621  	  "  --host, -h, (environment variable DMTCP_HOST):\n"
622  	  "      Hostname where dmtcp_coordinator is run (default: localhost)\n"
623  	  "  --port, -p, (environment variable DMTCP_PORT):\n"
624  	  "      Port where dmtcp_coordinator is run (default: 7779)\n"
625  	  "  --tmpdir, -t, (environment variable DMTCP_TMPDIR):\n"
626  	  "      Directory to store temporary files \n"
627  	  "        (default: $TMDPIR/dmtcp-$USER@$HOST or /tmp/dmtcp-$USER@$HOST)\n"
628  	  "  --join, -j:\n"
629  	  "      Join an existing coordinator, raise error if one already exists\n"
630  	  "  --new, -n:\n"
631  	  "      Create a new coordinator, raise error if one already exists\n"
632  	  "  --new-coordinator:\n"
633  	  "      Create a new coordinator even if one already exists\n"
634  	  "  --batch, -b:\n"
635  	  "      Enable batch mode i.e. start the coordinator on the same node on\n"
636  	  "        a randomly assigned port (if no port is specified by --port)\n"
637  	  "  --interval, -i, (environment variable DMTCP_CHECKPOINT_INTERVAL):\n"
638  	  "      Time in seconds between automatic checkpoints.\n"
639  	  "      Not allowed if --join is specified\n"
640  	  "      --batch implies -i 3600, unless otherwise specified.\n"
641  	  "  --no-check:\n"
642  	  "      Skip check for valid coordinator and never start one automatically\n"
643  	  "  --quiet, -q, (or set environment variable DMTCP_QUIET = 0, 1, or 2):\n"
644  	  "      Skip banner and NOTE messages; if given twice, also skip WARNINGs\n\n"
645  	  "See http://dmtcp.sf.net/ for more information.\n"
646  	;
647  	
648  	static const char* theBanner =
649  	  "DMTCP/MTCP  Copyright (C) 2006-2010  Jason Ansel, Michael Rieker,\n"
650  	  "                                       Kapil Arya, and Gene Cooperman\n"
651  	  "This program comes with ABSOLUTELY NO WARRANTY.\n"
652  	  "This is free software, and you are welcome to redistribute it\n"
653  	  "under certain conditions; see COPYING file for details.\n"
654  	  "(Use flag \"-q\" to hide this message.)\n\n"
655  	;
656  	
657  	//shift args
658  	#define shift argc--,argv++
659  	
660  	dmtcp::vector<RestoreTarget> targets;
661  	
662  	#ifdef PID_VIRTUALIZATION
663  	typedef struct {
664  	  RestoreTarget *t;
665  	  bool indep;
666  	} RootTarget;
667  	dmtcp::vector<RootTarget> roots;
668  	void BuildProcessTree();
669  	void ProcessGroupInfo();
670  	void SetupSessions();
671  	
672  	#endif
673  	
674  	int main ( int argc, char** argv )
675  	{
676  	  bool autoStartCoordinator=true;
677  	  bool isRestart = true;
678  	  int allowedModes = dmtcp::DmtcpWorker::COORD_ANY;
679  	
680  	  if (! getenv(ENV_VAR_QUIET))
681  	    setenv(ENV_VAR_QUIET, "0", 0);
682  	
683  	  //process args
684  	  shift;
685  	  while(true){
686  	    dmtcp::string s = argc>0 ? argv[0] : "--help";
687  	    if(s=="--help" || (s=="-h" && argc==1)){
688  	      JASSERT_STDERR << theUsage;
689  	      //fprintf(stderr, theUsage, "");
690  	      return 1;
691  	    }else if(s == "--no-check"){
692  	      autoStartCoordinator = false;
693  	      shift;
694  	    }else if(s == "-j" || s == "--join"){
695  	      allowedModes = dmtcp::DmtcpWorker::COORD_JOIN;
696  	      shift;
697  	    }else if(s == "-n" || s == "--new"){
698  	      allowedModes = dmtcp::DmtcpWorker::COORD_NEW;
699  	      shift;
700  	    }else if(s == "--new-coordinator"){
701  	      allowedModes = dmtcp::DmtcpWorker::COORD_FORCE_NEW;
702  	      shift;
703  	    }else if(s == "-b" || s == "--batch"){
704  	      allowedModes = dmtcp::DmtcpWorker::COORD_BATCH;
705  	      shift;
706  	    }else if(s == "-i" || s == "--interval"){
707  	      setenv(ENV_VAR_CKPT_INTR, argv[1], 1);
708  	      shift; shift;
709  	    }else if(argc>1 && (s == "-h" || s == "--host")){
710  	      setenv(ENV_VAR_NAME_ADDR, argv[1], 1);
711  	      shift; shift;
712  	    }else if(argc>1 && (s == "-p" || s == "--port")){
713  	      setenv(ENV_VAR_NAME_PORT, argv[1], 1);
714  	      shift; shift;
715  	    }else if(argc>1 && (s == "-t" || s == "--tmpdir")){
716  	      setenv(ENV_VAR_TMPDIR, argv[1], 1);
717  	      shift; shift;
718  	    }else if(s == "-q" || s == "--quiet"){
719  	      *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1;
720  	      // Just in case a non-standard version of setenv is being used:
721  	      setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1);
722  	      shift;
723  	    }else if( (s.length()>2 && s.substr(0, 2)=="--") ||
724  	              (s.length()>1 && s.substr(0, 1)=="-" ) ) {
725  	      JASSERT_STDERR << "Invalid Argument\n";
726  	      JASSERT_STDERR << theUsage;
727  	      return 1;
728  	    }else if(argc>1 && s=="--"){
729  	      shift;
730  	      break;
731  	    }else{
732  	      break;
733  	    }
734  	  }
735  	
736  	  dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR));
737  	  dmtcpTmpDir = dmtcp::UniquePid::getTmpDir();
738  	
739  	  jassert_quiet = *getenv(ENV_VAR_QUIET) - '0';
740  	
741  	  if (jassert_quiet == 0)
742  	    JASSERT_STDERR << theBanner;
743  	
744  	  if (autoStartCoordinator)
745  	    dmtcp::DmtcpWorker::startCoordinatorIfNeeded(allowedModes, isRestart);
746  	
747  	  //make sure JASSERT initializes now, rather than during restart
748  	  dmtcp::ostringstream o;
749  	  o << dmtcpTmpDir << "/jassertlog." << dmtcp::UniquePid(getpid());
750  	  JASSERT_INIT(o.str());
751  	  JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc);
752  	
753  	  bool doAbort = false;
754  	  for(; argc>0; shift){
755  	    dmtcp::string restorename(argv[0]);
756  	    struct stat buf;
757  	    int rc = stat(restorename.c_str(), &buf);
758  	    if (dmtcp::Util::strStartsWith(restorename, "ckpt_") &&
759  	        dmtcp::Util::strEndsWith(restorename, "_files")) {
760  	      continue;
761  	#ifndef URDB
762  	    } else if (!dmtcp::Util::strEndsWith(restorename, ".dmtcp")) {
763  	      JNOTE("File doesn't have .dmtcp extension. Check Usage.")
764  	        (restorename);
765  	      JASSERT_STDERR << theUsage;
766  	      doAbort = true;
767  	#endif
768  	    } else if (rc == -1) {
769  	      char error_msg[1024];
770  	      sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str());
771  	      perror(error_msg);
772  	      doAbort = true;
773  	    } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/
774  	      printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \
775  	             "checkpoint image (%s).\n" \
776  		     "This is dangerous.  Aborting for security reasons.\n" \
777  	           "If you still want to do this (at your own risk),\n" \
778  	           "  then modify dmtcp/src/%s:%d and re-compile.\n",
779  	           getuid(), buf.st_uid, restorename.c_str(), __FILE__, __LINE__ - 6);
780  	      doAbort = true;
781  	    }
782  	    if (doAbort) {
783  	      exit(1);
784  	    }
785  	
786  	    JTRACE("Will restart ckpt image _argv[0]_") (argv[0]);
787  	    targets.push_back ( RestoreTarget ( argv[0] ) );
788  	  }
789  	
790  	  if (targets.size() <= 0) {
791  	    JNOTE("ERROR: No DMTCP checkpoint image(s) found. Check Usage.");
792  	    JASSERT_STDERR << theUsage;
793  	    exit(1);
794  	  }
795  	
796  	  SlidingFdTable slidingFd;
797  	  ConnectionToFds conToFd;
798  	
799  	  ConnectionList& connections = ConnectionList::instance();
800  	  for ( ConnectionList::iterator i = connections.begin()
801  	                                     ; i!= connections.end()
802  	          ; ++i )
803  	  {
804  	    conToFd[i->first].push_back ( slidingFd.getFdFor ( i->first ) );
805  	    JTRACE ( "will restore" ) ( i->first ) ( conToFd[i->first].back() );
806  	  }
807  	
808  	  // Check that all targets belongs to one computation group
809  	  // If not - abort
810  	  for(size_t i=0; i<targets.size(); i++){
811  	    JTRACE ( "Check targets: " )
812  	      ( targets[i]._path ) ( targets[i]._compGroup ) ( targets[i]._numPeers );
813  	  }
814  	
815  	  compGroup = targets[0]._compGroup;
816  	  numPeers = targets[0]._numPeers;
817  	  for(size_t i=0; i<targets.size(); i++){
818  	    if( compGroup != targets[i]._compGroup){
819  	      JASSERT(false)(compGroup)(targets[i]._compGroup)
820  		.Text("ERROR: Restored programs belongs to different computation IDs");
821  	    }else if( numPeers != targets[i]._numPeers ){
822  	      JASSERT(false)(numPeers)(targets[i]._numPeers)
823  		.Text("ERROR: Different numpber of processes saved in checkpoint images");
824  	    }
825  	  }
826  	
827  	  //------------------------
828  	  DmtcpWorker worker ( false );
829  	  WorkerState::setCurrentState ( WorkerState::RESTARTING );
830  	  ConnectionState ckptCoord ( conToFd );
831  	  worker.restoreSockets ( ckptCoord, compGroup, numPeers, coordTstamp );
832  	
833  	#ifndef PID_VIRTUALIZATION
834  	  int i = (int)targets.size();
835  	
836  	  //fork into targs.size() processes
837  	  while(--i > 0){
838  	    int cid = fork();
839  	    if(cid==0) break;
840  	    else JASSERT(cid>0);
841  	  }
842  	  RestoreTarget& targ = targets[i];
843  	
844  	  JTRACE("forked, restoring process")(i)(targets.size())(targ.pid())(getpid());
845  	
846  	  //change UniquePid
847  	  UniquePid::resetOnFork(targ.pid());
848  	
849  	  //Reconnect to dmtcp_coordinator
850  	  WorkerState::setCurrentState ( WorkerState::RESTARTING );
851  	  worker.connectToCoordinatorWithoutHandshake();
852  	  worker.sendCoordinatorHandshake(targ.procname());
853  	
854  	  //restart targets[i]
855  	  targets[i].dupAllSockets ( slidingFd );
856  	  targets[i].mtcpRestart();
857  	
858  	  JASSERT ( false ).Text ( "unreachable" );
859  	  return -1;
860  	#else
861  	  size_t i = targets.size();
862  	
863  	  // Create roots vector, assign children to their parents.
864  	  // Delete children that don't exist.
865  	  BuildProcessTree();
866  	
867  	  // Process all checkpoints to find one of them that can switch
868  	  // needed group to foreground.
869  	  ProcessGroupInfo();
870  	  // Create session meta-information in each node of the process tree.
871  	  // Node contains info about all sessions which exists at lower levels.
872  	  // Also node is aware of session leader existence at lower levels.
873  	  SetupSessions();
874  	
875  	  /* Create the file to hold the pid/tid maps. */
876  	  openOriginalToCurrentMappingFiles();
877  	
878  	  int pgrp_index=-1;
879  	  JTRACE ( "Creating ROOT Processes" )(roots.size());
880  	  for ( int j = 0 ; j < roots.size(); ++j )
881  	  {
882  	    if( roots[j].indep == false ){
883  	      // We will restore this process from one of the independent roots.
884  	      continue;
885  	    }
886  	    if (pgrp_index == -1 && !roots[j].t->isInitChild() ){
887  	      pgrp_index = j;
888  	      continue;
889  	    }
890  	
891  	    pid_t cid = fork();
892  	    if ( cid == 0 ){
893  	      JTRACE ( "Root of process tree" ) ( _real_getpid() ) ( _real_getppid() );
894  	      if( roots[j].t->isInitChild() ){
895  	        JTRACE ( "Create init-child process" ) ( _real_getpid() )
896  						       ( _real_getppid() );
897  	        if( fork() )
898  	          _exit(0);
899  	      }
900  	      roots[j].t->CreateProcess(worker, slidingFd);
901  	      JASSERT (false) . Text( "Unreachable" );
902  	    }
903  	    JASSERT ( cid > 0 );
904  	    if( roots[j].t->isInitChild() ){
905  	      waitpid(cid, NULL, 0);
906  	    }
907  	  }
908  	
909  	  JTRACE("Restore processes without corresponding Root Target");
910  	  int flat_index = -1;
911  	  int j = 0;
912  	  if( pgrp_index < 0 ){ // No root processes at all
913  	    // Find first flat process that can replace currently running
914  	    //   dmtcp_restart context.
915  	    for (j = 0; j < targets.size(); ++j){
916  	      if( !targets[j]._used ){
917  	            // Save first flat-like process to be restored after all others
918  	            flat_index = j;
919  	            j++;
920  	            break;
921  	      }
922  	    }
923  	  }
924  	  // Use j set to 0 (if at least one root non-init-child process exists),
925  	  // or else j set to some value if no such process found.
926  	  for(; j < targets.size(); ++j)
927  	  {
928  	    if( !targets[j]._used ){
929  	      if( pgrp_index < 0 ){
930  	          // Save first flat-like process to be restored after all others
931  	          pgrp_index = j;
932  	          continue;
933  	      }else{
934  	        targets[j].CreateProcess(worker, slidingFd);
935  	        JTRACE("Need in flat-like restore for process")(targets[j].pid());
936  	      }
937  	    }
938  	  }
939  	
940  	  if( pgrp_index >=0 ){
941  	    JTRACE("Restore first Root Target")(roots[pgrp_index].t->pid());
942  	    roots[pgrp_index].t->CreateProcess(worker, slidingFd);
943  	  }else if (flat_index >= 0){
944  	    JTRACE("Restore first Flat Target")(targets[flat_index].pid());
945  	    targets[flat_index].CreateProcess(worker, slidingFd );
946  	  }else{
947  	    // FIXME: Under what conditions will this path be exercised?
948  	    JNOTE ("unknown type of target?") (targets[flat_index]._path);
949  	  }
950  	#endif
951  	}
952  	
953  	#ifdef PID_VIRTUALIZATION
954  	void BuildProcessTree()
955  	{
956  	  for (size_t j = 0; j < targets.size(); ++j)
957  	  {
958  	    VirtualPidTable& virtualPidTable = targets[j].getVirtualPidTable();
959  	    originalPidTable.insertFromVirtualPidTable ( virtualPidTable );
960  	    if( virtualPidTable.isRootOfProcessTree() == true ){
961  	      // If this process is independent (root of process tree
962  	      RootTarget rt;
963  	      rt.t = &targets[j];
964  	      rt.indep = true;
965  	      roots.push_back(rt);
966  	      targets[j]._used = true;
967  	    }else if( !targets[j]._used ){
968  	      // We set used flag if we use target as somebodys child. If it is used - no need to check is it roor
969  	      // Iterate through all targets and try to find the one who has this process
970  	      // as child process
971  	      JTRACE("Process is not root of process tree: try to find if it has parent");
972  	      bool is_root = true;
973  	      for (size_t i = 0; i < targets.size(); i++) {
974  	        VirtualPidTable & virtualPidTable = targets[i].getVirtualPidTable();
975  	        VirtualPidTable::iterator it;
976  	        // Search inside the child list of target[j], make sure that i != j
977  	        for (it = virtualPidTable.begin(); (i != j) && (it != virtualPidTable.end()) ; it++) {
978  	          UniquePid& childUniquePid = it->second;
979  	          JTRACE("Check child")(childUniquePid)(" parent ")(targets[i].pid())("checked ")(targets[j].pid());
980  	          if (childUniquePid == targets[j].pid()){
981  	            is_root = false;
982  	            break;
983  	          }
984  	        }
985  	      }
986  	      JTRACE("Root detection:")(is_root)(targets[j].pid());
987  	      if( is_root ){
988  	        RootTarget rt;
989  	        rt.t = &targets[j];
990  	        rt.indep = true;
991  	        roots.push_back(rt);
992  	        targets[j]._used = true;
993  	      }
994  	    }
995  	
996  	    // Add all children
997  	    VirtualPidTable::iterator it;
998  	    for(it = virtualPidTable.begin(); it != virtualPidTable.end(); it++ ){
999  	      // find target
1000 	      bool found = false;
1001 	      pid_t childOriginalPid = it->first;
1002 	      UniquePid& childUniquePid = it->second;
1003 	
1004 	      for ( size_t i = 0; i < targets.size(); i++ )
1005 	      {
1006 	        if ( childUniquePid == targets[i].pid() )
1007 	        {
1008 	          found = 1;
1009 	          JTRACE ( "Add child to current target" ) ( targets[j].pid() ) ( childUniquePid );
1010 	          targets[i]._used = true;
1011 	          targets[j].addChild(&targets[i]);
1012 	        }
1013 	      }
1014 	      if ( !found ){
1015 	        JTRACE("Child not found")(childOriginalPid);
1016 	        virtualPidTable.erase( childOriginalPid );
1017 	      }
1018 	    }
1019 	  }
1020 	}
1021 	
1022 	/*
1023 	 * Group processing
1024 	 * 1. Divide all processes into sessions
1025 	 * 2. Divide processes in each session into groups
1026 	 * 3. In each group check that stored foreground values are equal.
1027 	 *    If not, something's wrong:  ABORT
1028 	 * 4. In each session choose the process that can bring appropriate group
1029 	 *    to foreground
1030 	 * 5. Serialize information about chosen UniquePIDs in following
1031 	 *    format: "COUNT:unique-pid1:unique-pid2:..."
1032 	 * 6. Deserialize information from step 5 in forked and restored processes.
1033 	 *
1034 	 */
1035 	
1036 	class group {
1037 	public:
1038 	  group(){
1039 	    gid = -2;
1040 	  }
1041 	  pid_t gid;
1042 	  vector<RestoreTarget*> targets;
1043 	};
1044 	
1045 	class session{
1046 	public:
1047 	  session(){
1048 	    sid = -2;
1049 	    fgid = -2;
1050 	  }
1051 	  pid_t sid;
1052 	  pid_t fgid;
1053 	  map<pid_t,group> groups;
1054 	  typedef map<pid_t,group>::iterator group_it;
1055 	  UniquePid upid;
1056 	};
1057 	
1058 	void ProcessGroupInfo()
1059 	{
1060 	  map<pid_t,session> smap;
1061 	  map<pid_t,session>::iterator it;
1062 	
1063 	  // 1. divide processes into sessions and groups
1064 	  for (size_t j = 0; j < targets.size(); j++)
1065 	  {
1066 	    VirtualPidTable& virtualPidTable = targets[j].getVirtualPidTable();
1067 	    JTRACE("Process ")
1068 	      (virtualPidTable.pid())(virtualPidTable.ppid())(virtualPidTable.sid())
1069 	      (virtualPidTable.gid())(virtualPidTable.fgid())
1070 	      (virtualPidTable.isRootOfProcessTree());
1071 	
1072 	    pid_t sid = virtualPidTable.sid();
1073 	    pid_t gid = virtualPidTable.gid();
1074 	    pid_t fgid = virtualPidTable.fgid();
1075 	
1076 	    /*
1077 	    // If group ID doesn't belong to known PIDs, indicate that fact
1078 	    //   using -1 value.
1079 	    if( !virtualPidTable.pidExists(gid) ){
1080 	    JTRACE("DROP gid")(gid);
1081 	    virtualPidTable.setgid(-1);
1082 	    gid = -1;
1083 	    }
1084 	    // If foreground group ID not belongs to known PIDs,
1085 	    //   indicate that fact using -1 value.
1086 	    if( !virtualPidTable.pidExists(fgid) ){
1087 	    JTRACE("DROP fgid")(fgid);
1088 	    virtualPidTable.setfgid(-1);
1089 	    fgid = -1;
1090 	    }
1091 	    */
1092 	
1093 	    session &s = smap[sid];
1094 	    // if this is first element of this session
1095 	    if( s.sid == -2 ){
1096 	      s.sid = sid;
1097 	    }
1098 	    group &g = smap[sid].groups[gid];
1099 	    // if this is first element of group gid
1100 	    if( g.gid == -2 ){
1101 	      g.gid = gid;
1102 	    }
1103 	    g.targets.push_back(&targets[j]);
1104 	  }
1105 	
1106 	  // 2. Check if foreground setting is correct
1107 	  it = smap.begin();
1108 	  for(;it != smap.end();it++){
1109 	    session &s = it->second;
1110 	    session::group_it g_it = s.groups.begin();
1111 	    pid_t fgid = -2;
1112 	    for(; g_it!=s.groups.end();g_it++){
1113 	      group &g = g_it->second;
1114 	      for(size_t k=0; k<g.targets.size(); k++){
1115 	        VirtualPidTable& virtualPidTable = g.targets[k]->getVirtualPidTable();
1116 	        pid_t cfgid = virtualPidTable.fgid();
1117 	        if( fgid == -2 ){
1118 	          fgid = cfgid;
1119 	        }else if( fgid != -1 && cfgid != -1 && fgid != cfgid ){
1120 	          printf("Error: process from same session stores different"
1121 	              " foreground group ID: %d, %d\n", fgid, cfgid);
1122 	          // DEBUG PRINTOUT:
1123 	          {
1124 	            session::group_it g_it1 = s.groups.begin();
1125 	            for(; g_it1!=s.groups.end();g_it1++){
1126 	              group &g1 = g_it1->second;
1127 	              for(size_t m=0; m<g1.targets.size() ;m++){
1128 	                VirtualPidTable& virtualPidTable = g1.targets[m]->getVirtualPidTable();
1129 	                pid_t pid = virtualPidTable.pid();
1130 	                pid_t cfgid = virtualPidTable.fgid();
1131 	                printf("PID=%d <--> FGID = %d\n",pid,cfgid);
1132 	              }
1133 	            }
1134 	          }
1135 	          abort();
1136 	        }
1137 	      }
1138 	      JTRACE("Checked ")(fgid);
1139 	    }
1140 	    s.fgid = fgid;
1141 	    if( s.groups.find(s.fgid) == s.groups.end() ){
1142 	      // foreground group is missing, don't need to change foreground groop
1143 	      s.fgid = -1;
1144 	    }
1145 	
1146 	    {
1147 	      session::group_it g_it1 = s.groups.begin();
1148 	      for(; g_it1!=s.groups.end();g_it1++){
1149 	        group &g1 = g_it1->second;
1150 	        for(size_t m=0; m<g1.targets.size(); m++){
1151 	          VirtualPidTable& virtualPidTable = g1.targets[m]->getVirtualPidTable();
1152 	          pid_t pid = virtualPidTable.pid();
1153 	          pid_t cfgid = virtualPidTable.fgid();
1154 	          JTRACE("PID=%d <--> FGID = %d")(pid)(cfgid);
1155 	        }
1156 	      }
1157 	    }
1158 	  }
1159 	
1160 	  // Print out session mapping.
1161 	  JTRACE("Session number:")(smap.size());
1162 	  it = smap.begin();
1163 	  for( ; it != smap.end(); it++ ){
1164 	    session &s = it->second;
1165 	    JTRACE("Session printout:")(s.sid)(s.fgid)(s.upid.toString().c_str());
1166 	    session::group_it g_it = s.groups.begin();
1167 	    for(; g_it!=s.groups.end();g_it++){
1168 	      group &g = g_it->second;
1169 	      JTRACE("\tGroup ID: ")(g.gid);
1170 	      /*
1171 	         for(k=0; k<g.targets.size() ;k++){
1172 	         printf("%d ", g.targets[k]->pid().pid());
1173 	         }
1174 	         printf("\n");
1175 	         */
1176 	    }
1177 	  }
1178 	}
1179 	
1180 	void SetupSessions()
1181 	{
1182 	  for(size_t j=0; j < roots.size(); j++){
1183 	    roots[j].t->setupSessions();
1184 	  }
1185 	
1186 	  for(size_t i = 0; i < roots.size(); i++){
1187 	    for(size_t j = 0; j < roots.size(); j++){
1188 	      if( i == j )
1189 	        continue;
1190 	      pid_t sid;
1191 	      if( (sid = (roots[i].t)->checkDependence(roots[j].t)) >= 0 ){
1192 	        // it2 depends on it1
1193 	        JTRACE("Root target j depends on Root target i")(i)(roots[i].t->pid())(j)(roots[j].t->pid());
1194 	        (roots[i].t)->addRoot(roots[j].t, sid);
1195 	        roots[j].indep = false;
1196 	      }
1197 	    }
1198 	  }
1199 	}
1200 	
1201 	int openSharedFile(dmtcp::string name, int flags)
1202 	{
1203 	  int fd;
1204 	  // try to create, truncate & open file
1205 	  if( (fd = open(name.c_str(), O_EXCL|O_CREAT|O_TRUNC | flags, 0600)) >= 0) {
1206 	    return fd;
1207 	  }
1208 	  if (fd < 0 && errno == EEXIST) {
1209 	    if ((fd = open(name.c_str(), flags, 0600)) > 0) {
1210 	      return fd;
1211 	    }
1212 	  }
1213 	  // unable to create & open OR open
1214 	  JASSERT( false )(name)(strerror(errno)).Text("Cannot open file");
1215 	  return -1;
1216 	}
1217 	
1218 	static void openOriginalToCurrentMappingFiles()
1219 	{
1220 	  dmtcp::ostringstream pidMapFile, pidMapCountFile;
1221 	  dmtcp::ostringstream shmidListFile, shmidMapFile;
1222 	  int fd;
1223 	
1224 	  shmidMapFile << dmtcpTmpDir << "/dmtcpShmidMap."
1225 	     << compGroup << "." << std::hex << coordTstamp;
1226 	  shmidListFile << dmtcpTmpDir << "/dmtcpShmidList."
1227 	     << compGroup << "." << std::hex << coordTstamp;
1228 	
1229 	  pidMapFile << dmtcpTmpDir << "/dmtcpPidMap."
1230 	     << compGroup << "." << std::hex << coordTstamp;
1231 	  pidMapCountFile << dmtcpTmpDir << "/dmtcpPidMapCount."
1232 	     << compGroup << "." << std::hex << coordTstamp;
1233 	
1234 	  // Open and create shmidListFile if it doesn't exist.
1235 	  JTRACE("Open dmtcpShmidMapFile")(shmidListFile.str());
1236 	  fd = openSharedFile(shmidListFile.str(), (O_WRONLY|O_APPEND));
1237 	  JASSERT ( fd != -1 );
1238 	  JASSERT ( dup2 ( fd, PROTECTED_SHMIDLIST_FD ) == PROTECTED_SHMIDLIST_FD )
1239 		  ( shmidListFile.str() );
1240 	  close (fd);
1241 	
1242 	  // Open and create shmidMapFile if it doesn't exist.
1243 	  JTRACE("Open dmtcpShmidMapFile")(shmidMapFile.str());
1244 	  fd = openSharedFile(shmidMapFile.str(), (O_WRONLY|O_APPEND));
1245 	  JASSERT ( fd != -1 );
1246 	  JASSERT ( dup2 ( fd, PROTECTED_SHMIDMAP_FD ) == PROTECTED_SHMIDMAP_FD )
1247 		  ( shmidMapFile.str() );
1248 	  close (fd);
1249 	
1250 	  // Open and create pidMapFile if it doesn't exist.
1251 	  JTRACE("Open dmtcpPidMapFile")(pidMapFile.str());
1252 	  fd = openSharedFile(pidMapFile.str(), (O_WRONLY|O_APPEND));
1253 	  JASSERT ( fd != -1 );
1254 	  JASSERT ( dup2 ( fd, PROTECTED_PIDMAP_FD ) == PROTECTED_PIDMAP_FD )
1255 		  ( pidMapFile.str() );
1256 	  close (fd);
1257 	
1258 	  // Open and create pidMapCountFile if it doesn't exist.
1259 	  JTRACE("Open dmtcpPidMapCount files for writing")(pidMapCountFile.str());
1260 	  fd = openSharedFile(pidMapCountFile.str(), O_RDWR);
1261 	  JASSERT ( fd != -1 );
1262 	  JASSERT ( dup2 ( fd, PROTECTED_PIDMAPCNT_FD ) == PROTECTED_PIDMAPCNT_FD )
1263 		  ( pidMapCountFile.str() );
1264 	  close(fd);
1265 	
1266 	  dmtcp::Util::lockFile(PROTECTED_PIDMAPCNT_FD);
1267 	
1268 	  // Initialize pidMapCountFile with zero value.
1269 	  static jalib::JBinarySerializeWriterRaw countwr(pidMapCountFile.str(),
1270 							  PROTECTED_PIDMAPCNT_FD);
1271 	  if( countwr.isempty() ){
1272 	    JTRACE("pidMapCountFile is empty.  Initialize it with count = 0")
1273 	      (pidMapCountFile.str());
1274 	    size_t numMaps = 0;
1275 	    dmtcp::VirtualPidTable::serializeEntryCount (countwr, numMaps);
1276 	    fsync(PROTECTED_PIDMAPCNT_FD);
1277 	  }else{
1278 	    JTRACE("pidMapCountFile is not empty - do nothing");
1279 	  }
1280 	
1281 	  dmtcp::Util::unlockFile(PROTECTED_PIDMAPCNT_FD);
1282 	}
1283 	#endif
1284 	
1285 	static void runMtcpRestore ( const char* path, int offset )
1286 	{
1287 	  static dmtcp::string mtcprestart = jalib::Filesystem::FindHelperUtility ( "mtcp_restart" );
1288 	
1289 	  // Tell mtcp_restart process to write its debugging information to
1290 	  // PROTECTED_STDERR_FD. This way we prevent it from spitting out garbage onto
1291 	  // FD_STDERR if it is being used by the user process in a special way.
1292 	  char protected_stderr_fd_str[16];
Event secure_coding: [VERY RISKY]. Using "sprintf" can cause a buffer overflow when done incorrectly. Because sprintf() assumes an arbitrarily long string, callers must be careful not to overflow the actual space of the destination. Use snprintf() instead, or correct precision specifiers.
Also see events: [secure_coding]
1293 	  sprintf(protected_stderr_fd_str, "%d", PROTECTED_STDERR_FD);
1294 	
1295 	#ifdef USE_MTCP_FD_CALLING
1296 	  int fd = ConnectionToFds::openMtcpCheckpointFile(path);
1297 	  char buf[64];
1298 	  sprintf(buf, "%d", fd);
1299 	  char buf2[64];
1300 	  // gzip_child_pid set by openMtcpCheckpointFile() above.
1301 	  sprintf(buf2, "%d", dmtcp::ConnectionToFds::gzip_child_pid);
1302 	
1303 	  char* newArgs[] = {
1304 	    ( char* ) mtcprestart.c_str(),
1305 	    ( char* ) "--stderr-fd",
1306 	    protected_stderr_fd_str,
1307 	    ( char* ) "--fd",
1308 	    buf,
1309 	    ( char* ) "--gzip-child-pid",
1310 	    buf2,
1311 	    NULL
1312 	  };
1313 	  if (dmtcp::ConnectionToFds::gzip_child_pid == -1) // If no gzip compression
1314 	    newArgs[3] = NULL;
1315 	
1316 	  JTRACE ( "launching mtcp_restart --fd" )(fd)(path);
1317 	#else
1318 	  char buf[64];
Event secure_coding: [VERY RISKY]. Using "sprintf" can cause a buffer overflow when done incorrectly. Because sprintf() assumes an arbitrarily long string, callers must be careful not to overflow the actual space of the destination. Use snprintf() instead, or correct precision specifiers.
Also see events: [secure_coding]
1319 	  sprintf(buf, "%d", offset);
1320 	
1321 	  char* newArgs[] = {
1322 	    ( char* ) mtcprestart.c_str(),
1323 	    ( char* ) "--stderr-fd",
1324 	    protected_stderr_fd_str,
1325 	    ( char* ) "--offset",
1326 	    buf,
1327 	    (char*) path,
1328 	    NULL
1329 	  };
1330 	
1331 	  JTRACE ( "launching mtcp_restart --offset" )(path)(offset);
1332 	
1333 	#endif
1334 	
1335 	  execvp ( newArgs[0], newArgs );
1336 	  JASSERT ( false ) ( newArgs[0] ) ( newArgs[1] ) ( JASSERT_ERRNO )
1337 	          .Text ( "exec() failed" );
1338 	}