1 /****************************************************************************
2 * Copyright (C) 2006-2010 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3 * jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu *
4 * *
5 * This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src). *
6 * *
7 * DMTCP:dmtcp/src is free software: you can redistribute it and/or *
8 * modify it under the terms of the GNU Lesser General Public License as *
9 * published by the Free Software Foundation, either version 3 of the *
10 * License, or (at your option) any later version. *
11 * *
12 * DMTCP:dmtcp/src is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with DMTCP:dmtcp/src. If not, see *
19 * <http://www.gnu.org/licenses/>. *
20 ****************************************************************************/
21
22 #include <unistd.h>
23
24 #include <stdlib.h>
25 #include <string>
26 #include <stdio.h>
27 #include "../jalib/jassert.h"
28 #include "../jalib/jfilesystem.h"
29 #include "connectionmanager.h"
30 #include "dmtcpworker.h"
31 #include "dmtcpmessagetypes.h"
32 #include "connectionstate.h"
33 #include "mtcpinterface.h"
34 #include "syscallwrappers.h"
35 #include "protectedfds.h"
36 #include "util.h"
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <fcntl.h>
40 #include <errno.h>
41 #include <vector>
42
43 // Some global definitions
44 static dmtcp::UniquePid compGroup;
45 static int numPeers;
46 static int coordTstamp = 0;
47
48 dmtcp::string dmtcpTmpDir = "/DMTCP/UnInitialized/Tmp/Dir";
49
50 using namespace dmtcp;
51
52 #ifdef PID_VIRTUALIZATION
53 static void openOriginalToCurrentMappingFiles();
54 void unlockPidMapFile();
55 #endif
56 static void runMtcpRestore ( const char* path, int offset );
57
58 namespace
59 {
60
61 #ifdef PID_VIRTUALIZATION
62
63 class OriginalPidTable {
64 public:
65 OriginalPidTable(){}
66
67 void insertFromVirtualPidTable ( dmtcp::VirtualPidTable& vt )
68 {
69 dmtcp::vector< pid_t > tmpVector;
70
71 _insert(vt.pid());
72
73 tmpVector = vt.getChildPidVector();
74 for ( size_t i = 0; i < tmpVector.size(); ++i )
75 _insert(tmpVector[i]);
76
77 tmpVector = vt.getTidVector();
78 for ( size_t i = 0; i < tmpVector.size(); ++i )
79 _insert(tmpVector[i]);
80 }
81
82 void _insert( pid_t pid )
83 {
84 if (!isConflictingChildPid (pid) /* && newVector[i] != getpid()*/) {
85 _vector.push_back ( pid );
86 JTRACE("New Pid Pushed to PidVector") (pid);
87 }
88 }
89
90 bool isConflictingChildPid ( pid_t pid )
91 {
92 //iterator i = _vector.find ( pid );
93 //if ( i == _vector.end() )
94 // return false;
95 for ( size_t i = 0; i < _vector.size(); ++i )
96 if ( _vector[i] == pid )
97 return true;
98
99 return false;
100 }
101
102 size_t numPids () { return _vector.size(); }
103
104 private:
105 typedef dmtcp::vector< pid_t >::iterator iterator;
106 dmtcp::vector< pid_t > _vector;
107 };
108
109 OriginalPidTable originalPidTable;
110
111 #endif
112
113
114 class RestoreTarget
115 {
116 public:
117 RestoreTarget ( const dmtcp::string& path )
118 : _path ( path )
119 {
120 JASSERT ( jalib::Filesystem::FileExists ( _path ) ) ( _path )
121 .Text ( "checkpoint file missing" );
122 #ifdef PID_VIRTUALIZATION
123 _offset = _conToFd.loadFromFile(_path, _compGroup, _numPeers,
124 _virtualPidTable);
125 _virtualPidTable.erase(getpid());
126 _roots.clear();
127 _children.clear();
128 _smap.clear();
129 _used = 0;
130 #else
131 _offset = _conToFd.loadFromFile(_path, _compGroup, _numPeers);
132 #endif
133 JTRACE ( "restore target" ) ( _path ) (_numPeers ) (_compGroup)
134 ( _conToFd.size() ) (_offset);
135 }
136
137 void dupAllSockets ( SlidingFdTable& slidingFd )
138 {
139 int lastfd = -1;
140 dmtcp::vector<int> fdlist;
141 for ( ConnectionToFds::const_iterator i = _conToFd.begin();
142 i!=_conToFd.end(); ++i )
143 {
144 Connection& con = ConnectionList::instance() [i->first];
145 if ( con.conType() == Connection::INVALID ){
146 JWARNING(false)(i->first).Text("Can't restore invalid Connection");
147 continue;
148 }
149
150 const dmtcp::vector<int>& fds = i->second;
151 for ( size_t x=0; x<fds.size(); ++x )
152 {
153 int fd = fds[x];
154 fdlist.push_back ( fd );
155 slidingFd.freeUpFd ( fd );
156 int oldFd = slidingFd.getFdFor ( i->first );
157 JTRACE ( "restoring fd" ) ( i->first ) ( oldFd ) ( fd );
158 //let connection do custom dup2 handling
159 con.restartDup2( oldFd, fd );
160
161 if ( fd > lastfd )
162 {
163 lastfd = fd;
164 }
165 }
166 }
167
168 size_t j;
169 for ( int i = 0 ; i < slidingFd.startFd() ; i++ )
170 {
171 for ( j = 0 ; j < fdlist.size() ; j++ )
172 {
173 if ( fdlist.at ( j ) == i )
174 break;
175 }
176 if ( j == fdlist.size() )
177 {
178 _real_close ( i );
179 }
180 }
181
182 slidingFd.closeAll();
183 }
184
185 int find_stdin( SlidingFdTable& slidingFd )
186 {
|
At conditional (1): "i.operator !=(std::_Rb_tree_const_iterator<std::pair<dmtcp::ConnectionIdentifier const, dmtcp::vector<int> > >::_Self const(this->_conToFd.end()))": Taking true branch.
|
|
At conditional (7): "i.operator !=(std::_Rb_tree_const_iterator<std::pair<dmtcp::ConnectionIdentifier const, dmtcp::vector<int> > >::_Self const(this->_conToFd.end()))": Taking false branch.
|
187 for ( ConnectionToFds::const_iterator i = _conToFd.begin();
188 i!=_conToFd.end(); ++i )
189 {
190 const dmtcp::vector<int>& fds = i->second;
|
At conditional (2): "x < fds->size()": Taking true branch.
|
|
At conditional (4): "x < fds->size()": Taking true branch.
|
|
At conditional (6): "x < fds->size()": Taking false branch.
|
191 for ( size_t x=0; x<fds.size(); ++x )
192 {
|
At conditional (3): "fds->operator [](x) == 0": Taking false branch.
|
|
At conditional (5): "fds->operator [](x) == 0": Taking false branch.
|
193 if (fds[x] == STDIN_FILENO){
194 JTRACE("Found stdin: fds[x] <---> slidingFd.getFdFor()")
195 (x) (fds[x]) (slidingFd.getFdFor ( i->first ));
196 return slidingFd.getFdFor ( i->first );
197 }
198 }
199 }
|
Event return_negative_constant: |
Explicitly returning negative value "-1". |
200 return -1;
201 }
202
203 void mtcpRestart()
204 {
205 runMtcpRestore ( _path.c_str(), _offset );
206 }
207
208 const UniquePid& pid() const { return _conToFd.pid(); }
209 const dmtcp::string& procname() const { return _conToFd.procname(); }
210
211 #ifdef PID_VIRTUALIZATION
212 typedef map<pid_t,bool> sidMapping;
213 typedef sidMapping::iterator s_iterator;
214 typedef vector<RestoreTarget *>::iterator t_iterator;
215
216 VirtualPidTable& getVirtualPidTable() { return _virtualPidTable; }
217 void addChild(RestoreTarget *t){ _children.push_back(t); }
218
219 bool isSessionLeader(){
220 JTRACE("")(_virtualPidTable.sid()) (pid().pid());
221 if( _virtualPidTable.sid() == pid().pid() )
222 return true;
223 else
224 return false;
225 }
226
227 bool isGroupLeader(){
228 JTRACE("")(_virtualPidTable.sid()) (pid().pid());
229 if( _virtualPidTable.gid() == pid().pid() )
230 return true;
231 else
232 return false;
233 }
234
235 bool isForegroundProcess() {
236 JTRACE("")(_virtualPidTable.sid()) (pid().pid());
237 if( _virtualPidTable.fgid() == _virtualPidTable.gid() )
238 return true;
239 else
240 return false;
241 }
242
243 bool isInitChild(){
244 JTRACE("")(_virtualPidTable.ppid());
245 if( _virtualPidTable.ppid() == 1 )
246 return true;
247 else
248 return false;
249 }
250
251 int addRoot(RestoreTarget *t, pid_t sid){
252 if( isSessionLeader() && _virtualPidTable.sid() == sid ){
253 _roots.push_back(t);
254 return 1;
255 }else{
256 t_iterator it = _children.begin();
257 for(; it != _children.end(); it++){
258 if( (*it)->addRoot(t, sid) )
259 return 1;
260 }
261 }
262 return 0;
263 }
264
265 // Traverse this process subtree and setup information about sessions
266 // and their leaders for all children.
267 sidMapping &setupSessions() {
268 pid_t sid = _virtualPidTable.sid();
269 if( !_children.size() ) {
270 _smap[sid] = isSessionLeader();
271 return _smap;
272 }
273 // We have at least one child
274 t_iterator it = _children.begin();
275 _smap = (*it)->setupSessions();
276 for(it++; it != _children.end();it++) {
277 sidMapping tmp = (*it)->setupSessions();
278 s_iterator it1 = tmp.begin();
279 for(;it1 != tmp.end(); it1++) {
280 s_iterator it2 = _smap.find(it1->first);
281 if( it2 != _smap.end() ) {
282 // mapping already exist
283 if( it2->second != it1->second ) {
284 // Session was created after child creation. So child from one
285 // thread cannot be member of session of child from other thread.
286 JASSERT(false). Text("One child contains session leader"
287 " and other contains session member!\n");
288 exit(0);
289 }
290 } else {
291 // add new mapping
292 _smap[it1->first] = it1->second;
293 }
294 }
295 }
296
297 s_iterator sit = _smap.find(sid);
298 if( sit != _smap.end() ) {
299 if( sit->second && !isSessionLeader() ) {
300 // child is leader and parent is slave - impossible
301 JASSERT(false)
302 .Text("child is leader and parent is slave - impossible\n");
303 exit(0);
304 }
305 }
306 _smap[sid] = isSessionLeader();
307 return _smap;
308 }
309
310 void printMapping(){
311 t_iterator it = _children.begin();
312 for(; it != _children.end(); it++){
313 (*it)->printMapping();
314 }
315 JTRACE("")(pid());
316 s_iterator sit = _smap.begin();
317 for(; sit != _smap.end(); sit++){
318 JTRACE("") (sit->first) (sit->second);
319 }
320 }
321
322 sidMapping &getSmap(){ return _smap; }
323
324 pid_t checkDependence(RestoreTarget *t){
325 sidMapping smap = t->getSmap();
326 s_iterator ext = smap.begin();
327 // Run through sessions --> has leader mapping
328 for(; ext != smap.end(); ext++){
329 if( ext->second == false ){
330 // Session pointed by ext has no leader in target t process tree
331 s_iterator intern = _smap.find(ext->first);
332 if( intern != _smap.end() && intern->second == true ){
333 // internal target has session leader in its tree
334 // TODO: can process trees be connected through several sessions?
335 return ext->first;
336 }
337 }
338 }
339 return -1;
340 }
341
342 void bringToForeground(SlidingFdTable& slidingFd)
343 {
344 char controllingTerm[L_ctermid];
345 pid_t pid;
346
|
Event negative_return_fn: |
Function "this->find_stdin(slidingFd)" returns a negative number. [details] |
|
Event var_assign: |
Assigning: signed variable "sin" = "<unnamed>::RestoreTarget::find_stdin(dmtcp::SlidingFdTable &)". |
| Also see events: |
[negative_returns][negative_returns] |
347 int sin = find_stdin(slidingFd);
348
|
At conditional (1): "this->isSessionLeader()": Taking true branch.
|
349 if( isSessionLeader() ){
350 // XXX: Where is the controlling terminal being set?
351 char *ptr = ttyname(sin);
352 int fd = open(ptr,O_RDWR);
353 if( ctermid(controllingTerm) ){
354 int tfd = open(ptr,O_RDONLY);
355 if( tfd >= 0 ){
356 JTRACE("Setting current controlling terminal") (controllingTerm);
357 close(tfd);
358 }else if (ptr == NULL){
359 JTRACE("Cannot restore controlling terminal") (ttyname(sin));
360 } else {
361 JWARNING(false) (ttyname(sin))
362 .Text("Cannot restore controlling terminal");
363 }
364 }
365 if (fd >= 0) close(fd);
366 }
367
368 pid_t gid = getpgid(0);
369 pid_t fgid = tcgetpgrp(sin);
370
371 if( !isForegroundProcess() )
372 return;
373 if( !isGroupLeader() ){
374 return;
375 }
376
377 if( gid != fgid ){
378 if( !(pid = fork()) ){ // fork subversive process
379 // This process moves itself to current foreground group
380 // and then changes foreground group to what we need
381 // so it works as a spy, saboteur or wrecker :)
382 // -- Artem
383 JTRACE("Change current GID to foreground GID.");
384
385 if( setpgid(0, fgid) ){
386 if (fgid == -1) {
387 JTRACE("CANNOT Change current GID to foreground GID")
388 (getpid()) (fgid) (_virtualPidTable.fgid()) (gid) (JASSERT_ERRNO);
389 } else {
390 JWARNING(false)
391 (getpid()) (fgid) (_virtualPidTable.fgid()) (gid) (JASSERT_ERRNO)
392 .Text("CANNOT Change current GID to foreground GID");
393 }
394 fflush(stdout);
395 exit(0);
396 }
397
398 if( tcsetpgrp(sin, gid) ){
399 printf("CANNOT Move parent GID to foreground: %s\n",
400 strerror(errno));
401 printf("PID=%d, FGID=%d, GID=%d\n",getpid(),fgid,gid);
402 printf("PID=%d, FGID=%d, _FGID=%d, GID=%d\n",
403 getpid(),fgid,_virtualPidTable.fgid(), gid);
404 fflush(stdout);
405 exit(0);
406 }
407
408 JTRACE("Finish foregrounding.")(getpid())(getpgid(0))(tcgetpgrp(0));
409 exit(0);
410 }else{
411 int status;
412 wait(&status);
413 }
414 }
415 }
416
417 void restoreGroup( SlidingFdTable& slidingFd )
418 {
419 if( isGroupLeader() ){
420 // create new group where this process becomes a leader
421 JTRACE("Create new group.");
422 setpgid(0, 0);
423 bringToForeground(slidingFd);
424 }
425 }
426
427 void CreateProcess(DmtcpWorker& worker, SlidingFdTable& slidingFd)
428 {
429 dmtcp::ostringstream o;
430 o << dmtcpTmpDir << "/jassertlog." << pid();
431 JASSERT_INIT(o.str());
432
433 //change UniquePid
434 UniquePid::resetOnFork(pid());
435 VirtualPidTable &vt = _virtualPidTable;
436
437 JTRACE("")(_real_getpid())(_real_getppid())(_real_getsid(0));
438
439 vt.updateMapping(pid().pid(), _real_getpid());
440 pid_t psid = vt.sid();
441
442 if( !isSessionLeader() ){
443
444 // Restore group information
445 restoreGroup(slidingFd);
446
447 // If process is not session leader, restore it and all children.
448 t_iterator it = _children.begin();
449 for(; it != _children.end(); it++){
450 JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
451 pid_t cid = forkChild();
452
453 if ( cid == 0 )
454 {
455 (*it)->CreateProcess (worker, slidingFd);
456 JASSERT ( false ) . Text ( "Unreachable" );
457 }
458 JASSERT ( cid > 0 );
459 VirtualPidTable::iterator vit = vt.begin();
460 for(; vit != vt.end(); vit++){
461 if( (*it)->pid() == vit->second ){
462 vt.updateMapping ( vit->first, cid );
463 break;
464 }
465 }
466
467 }
468 }else{
469 // Process is session leader.
470 // There may be not setsid-ed children.
471 for(t_iterator it = _children.begin(); it != _children.end(); it++){
472 s_iterator sit = (*it)->getSmap().find(psid);
473 JTRACE("Restore processes that was created before their parent called setsid()");
474 if( sit == (*it)->getSmap().end() ){
475 JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
476 pid_t cid = forkChild();
477 if ( cid == 0 )
478 {
479 (*it)->CreateProcess (worker, slidingFd);
480 JASSERT ( false ) . Text ( "Unreachable" );
481 }
482 JASSERT ( cid > 0 );
483 VirtualPidTable::iterator vit = _virtualPidTable.begin();
484 for(; vit != _virtualPidTable.end(); vit++){
485 if( (*it)->pid() == vit->second ){
486 _virtualPidTable.updateMapping ( vit->first, cid );
487 }
488 }
489 }
490 }
491
492 pid_t nsid = setsid();
493 JTRACE("change SID")(nsid);
494
495 // Restore group information
496 restoreGroup(slidingFd);
497
498 for(t_iterator it = _children.begin(); it != _children.end(); it++) {
499 JTRACE("Restore processes that was created after their parent called setsid()");
500 s_iterator sit = (*it)->getSmap().find(psid);
501 if( sit != (*it)->getSmap().end() ) {
502 JTRACE ( "Forking Child Process" ) ( (*it)->pid() );
503 pid_t cid = forkChild();
504 if ( cid == 0 ){
505 (*it)->CreateProcess (worker, slidingFd );
506 JASSERT ( false ) . Text ( "Unreachable" );
507 }
508 JASSERT ( cid> 0 );
509 VirtualPidTable::iterator vit = _virtualPidTable.begin();
510 for(; vit != _virtualPidTable.end(); vit++) {
511 if( (*it)->pid() == vit->second ) {
512 _virtualPidTable.updateMapping ( vit->first, cid );
513 }
514 }
515 }
516 }
517
518 for(t_iterator it = _roots.begin() ; it != _roots.end(); it++) {
519 JTRACE ( "Forking Dependent Root Process" ) ( (*it)->pid() );
520 pid_t cid;
521 if( (cid = fork()) ){
522 waitpid(cid, NULL, 0);
523 }else{
524 if( fork() )
525 exit(0);
526 (*it)->CreateProcess(worker, slidingFd );
527 JASSERT (false) . Text( "Unreachable" );
528 }
529 }
530 }
531
532 JTRACE("Child and dependent root processes forked, restoring process")
533 (pid())(getpid())(isGroupLeader());
534 // Save PID mapping information
535 pid_t orig = pid().pid();
536 pid_t curr = _real_getpid();
537 dmtcp::VirtualPidTable::InsertIntoPidMapFile(orig, curr);
538
539 //Reconnect to dmtcp_coordinator
540 WorkerState::setCurrentState ( WorkerState::RESTARTING );
541 worker.connectToCoordinatorWithoutHandshake();
542 worker.sendCoordinatorHandshake(procname(), _compGroup);
543 dmtcp::string serialFile = dmtcp::UniquePid::pidTableFilename();
544
545 JTRACE ( "PidTableFile: ") ( serialFile ) ( dmtcp::UniquePid::ThisProcess() );
546 jalib::JBinarySerializeWriter tblwr ( serialFile );
547 _virtualPidTable.serialize ( tblwr );
548 tblwr.~JBinarySerializeWriter();
549
550 int stmpfd = open( serialFile.c_str(), O_RDONLY);
551 JASSERT ( stmpfd >= 0 ) ( serialFile ) ( errno );
552
553 JASSERT ( dup2 ( stmpfd, PROTECTED_PIDTBL_FD) == PROTECTED_PIDTBL_FD )
554 ( serialFile ) ( stmpfd );
555
556 close (stmpfd);
557
558 //restart targets[i]
559 dupAllSockets ( slidingFd );
560
561 mtcpRestart();
562
563 JASSERT ( false ).Text ( "unreachable" );
564 }
565
566
567 static pid_t forkChild()
568 {
569 while ( 1 ) {
570
571 pid_t childPid = fork();
572
573 JASSERT ( childPid != -1 ) .Text ( "fork() failed" );
574
575 if ( childPid == 0 ) { /* child process */
576 if ( originalPidTable.isConflictingChildPid ( getpid() ) )
577 _exit(1);
578 else
579 return 0;
580 }
581 else { /* Parent Process */
582 if ( originalPidTable.isConflictingChildPid ( childPid ) ) {
583 JTRACE( "PID Conflict, creating new child" ) (childPid);
584 waitpid ( childPid, NULL, 0 );
585 }
586 else
587 return childPid;
588 }
589 }
590
591 return -1;
592 }
593 #endif
594
595 dmtcp::string _path;
596 int _offset;
597 ConnectionToFds _conToFd;
598 UniquePid _compGroup;
599 int _numPeers;
600 #ifdef PID_VIRTUALIZATION
601 VirtualPidTable _virtualPidTable;
602 // Links to children of this process
603 vector<RestoreTarget *> _children;
604 // Links to roots that depend on this target
605 // i.e. have SID of this target in its tree.
606 vector<RestoreTarget *> _roots;
607 sidMapping _smap;
608 bool _used;
609 #endif
610 };
611
612
613 } // end namespace
614
615 // gcc-4.3.4 -Wformat=2 issues false positives for warnings unless the format
616 // string has at least one format specifier with corresponding format argument.
617 // Ubuntu 9.01 uses -Wformat=2 by default.
618 static const char* theUsage =
619 "USAGE:\n dmtcp_restart [OPTIONS] <ckpt1.dmtcp> [ckpt2.dmtcp...]\n\n"
620 "OPTIONS:\n"
621 " --host, -h, (environment variable DMTCP_HOST):\n"
622 " Hostname where dmtcp_coordinator is run (default: localhost)\n"
623 " --port, -p, (environment variable DMTCP_PORT):\n"
624 " Port where dmtcp_coordinator is run (default: 7779)\n"
625 " --tmpdir, -t, (environment variable DMTCP_TMPDIR):\n"
626 " Directory to store temporary files \n"
627 " (default: $TMDPIR/dmtcp-$USER@$HOST or /tmp/dmtcp-$USER@$HOST)\n"
628 " --join, -j:\n"
629 " Join an existing coordinator, raise error if one already exists\n"
630 " --new, -n:\n"
631 " Create a new coordinator, raise error if one already exists\n"
632 " --new-coordinator:\n"
633 " Create a new coordinator even if one already exists\n"
634 " --batch, -b:\n"
635 " Enable batch mode i.e. start the coordinator on the same node on\n"
636 " a randomly assigned port (if no port is specified by --port)\n"
637 " --interval, -i, (environment variable DMTCP_CHECKPOINT_INTERVAL):\n"
638 " Time in seconds between automatic checkpoints.\n"
639 " Not allowed if --join is specified\n"
640 " --batch implies -i 3600, unless otherwise specified.\n"
641 " --no-check:\n"
642 " Skip check for valid coordinator and never start one automatically\n"
643 " --quiet, -q, (or set environment variable DMTCP_QUIET = 0, 1, or 2):\n"
644 " Skip banner and NOTE messages; if given twice, also skip WARNINGs\n\n"
645 "See http://dmtcp.sf.net/ for more information.\n"
646 ;
647
648 static const char* theBanner =
649 "DMTCP/MTCP Copyright (C) 2006-2010 Jason Ansel, Michael Rieker,\n"
650 " Kapil Arya, and Gene Cooperman\n"
651 "This program comes with ABSOLUTELY NO WARRANTY.\n"
652 "This is free software, and you are welcome to redistribute it\n"
653 "under certain conditions; see COPYING file for details.\n"
654 "(Use flag \"-q\" to hide this message.)\n\n"
655 ;
656
657 //shift args
658 #define shift argc--,argv++
659
660 dmtcp::vector<RestoreTarget> targets;
661
662 #ifdef PID_VIRTUALIZATION
663 typedef struct {
664 RestoreTarget *t;
665 bool indep;
666 } RootTarget;
667 dmtcp::vector<RootTarget> roots;
668 void BuildProcessTree();
669 void ProcessGroupInfo();
670 void SetupSessions();
671
672 #endif
673
674 int main ( int argc, char** argv )
675 {
676 bool autoStartCoordinator=true;
677 bool isRestart = true;
678 int allowedModes = dmtcp::DmtcpWorker::COORD_ANY;
679
680 if (! getenv(ENV_VAR_QUIET))
681 setenv(ENV_VAR_QUIET, "0", 0);
682
683 //process args
684 shift;
685 while(true){
686 dmtcp::string s = argc>0 ? argv[0] : "--help";
687 if(s=="--help" || (s=="-h" && argc==1)){
688 JASSERT_STDERR << theUsage;
689 //fprintf(stderr, theUsage, "");
690 return 1;
691 }else if(s == "--no-check"){
692 autoStartCoordinator = false;
693 shift;
694 }else if(s == "-j" || s == "--join"){
695 allowedModes = dmtcp::DmtcpWorker::COORD_JOIN;
696 shift;
697 }else if(s == "-n" || s == "--new"){
698 allowedModes = dmtcp::DmtcpWorker::COORD_NEW;
699 shift;
700 }else if(s == "--new-coordinator"){
701 allowedModes = dmtcp::DmtcpWorker::COORD_FORCE_NEW;
702 shift;
703 }else if(s == "-b" || s == "--batch"){
704 allowedModes = dmtcp::DmtcpWorker::COORD_BATCH;
705 shift;
706 }else if(s == "-i" || s == "--interval"){
707 setenv(ENV_VAR_CKPT_INTR, argv[1], 1);
708 shift; shift;
709 }else if(argc>1 && (s == "-h" || s == "--host")){
710 setenv(ENV_VAR_NAME_ADDR, argv[1], 1);
711 shift; shift;
712 }else if(argc>1 && (s == "-p" || s == "--port")){
713 setenv(ENV_VAR_NAME_PORT, argv[1], 1);
714 shift; shift;
715 }else if(argc>1 && (s == "-t" || s == "--tmpdir")){
716 setenv(ENV_VAR_TMPDIR, argv[1], 1);
717 shift; shift;
718 }else if(s == "-q" || s == "--quiet"){
719 *getenv(ENV_VAR_QUIET) = *getenv(ENV_VAR_QUIET) + 1;
720 // Just in case a non-standard version of setenv is being used:
721 setenv(ENV_VAR_QUIET, getenv(ENV_VAR_QUIET), 1);
722 shift;
723 }else if( (s.length()>2 && s.substr(0, 2)=="--") ||
724 (s.length()>1 && s.substr(0, 1)=="-" ) ) {
725 JASSERT_STDERR << "Invalid Argument\n";
726 JASSERT_STDERR << theUsage;
727 return 1;
728 }else if(argc>1 && s=="--"){
729 shift;
730 break;
731 }else{
732 break;
733 }
734 }
735
736 dmtcp::UniquePid::setTmpDir(getenv(ENV_VAR_TMPDIR));
737 dmtcpTmpDir = dmtcp::UniquePid::getTmpDir();
738
739 jassert_quiet = *getenv(ENV_VAR_QUIET) - '0';
740
741 if (jassert_quiet == 0)
742 JASSERT_STDERR << theBanner;
743
744 if (autoStartCoordinator)
745 dmtcp::DmtcpWorker::startCoordinatorIfNeeded(allowedModes, isRestart);
746
747 //make sure JASSERT initializes now, rather than during restart
748 dmtcp::ostringstream o;
749 o << dmtcpTmpDir << "/jassertlog." << dmtcp::UniquePid(getpid());
750 JASSERT_INIT(o.str());
751 JTRACE("New dmtcp_restart process; _argc_ ckpt images") (argc);
752
753 bool doAbort = false;
754 for(; argc>0; shift){
755 dmtcp::string restorename(argv[0]);
756 struct stat buf;
757 int rc = stat(restorename.c_str(), &buf);
758 if (dmtcp::Util::strStartsWith(restorename, "ckpt_") &&
759 dmtcp::Util::strEndsWith(restorename, "_files")) {
760 continue;
761 #ifndef URDB
762 } else if (!dmtcp::Util::strEndsWith(restorename, ".dmtcp")) {
763 JNOTE("File doesn't have .dmtcp extension. Check Usage.")
764 (restorename);
765 JASSERT_STDERR << theUsage;
766 doAbort = true;
767 #endif
768 } else if (rc == -1) {
769 char error_msg[1024];
770 sprintf(error_msg, "\ndmtcp_restart: ckpt image %s", restorename.c_str());
771 perror(error_msg);
772 doAbort = true;
773 } else if (buf.st_uid != getuid()) { /*Could also run if geteuid() matches*/
774 printf("\nProcess uid (%d) doesn't match uid (%d) of\n" \
775 "checkpoint image (%s).\n" \
776 "This is dangerous. Aborting for security reasons.\n" \
777 "If you still want to do this (at your own risk),\n" \
778 " then modify dmtcp/src/%s:%d and re-compile.\n",
779 getuid(), buf.st_uid, restorename.c_str(), __FILE__, __LINE__ - 6);
780 doAbort = true;
781 }
782 if (doAbort) {
783 exit(1);
784 }
785
786 JTRACE("Will restart ckpt image _argv[0]_") (argv[0]);
787 targets.push_back ( RestoreTarget ( argv[0] ) );
788 }
789
790 if (targets.size() <= 0) {
791 JNOTE("ERROR: No DMTCP checkpoint image(s) found. Check Usage.");
792 JASSERT_STDERR << theUsage;
793 exit(1);
794 }
795
796 SlidingFdTable slidingFd;
797 ConnectionToFds conToFd;
798
799 ConnectionList& connections = ConnectionList::instance();
800 for ( ConnectionList::iterator i = connections.begin()
801 ; i!= connections.end()
802 ; ++i )
803 {
804 conToFd[i->first].push_back ( slidingFd.getFdFor ( i->first ) );
805 JTRACE ( "will restore" ) ( i->first ) ( conToFd[i->first].back() );
806 }
807
808 // Check that all targets belongs to one computation group
809 // If not - abort
810 for(size_t i=0; i<targets.size(); i++){
811 JTRACE ( "Check targets: " )
812 ( targets[i]._path ) ( targets[i]._compGroup ) ( targets[i]._numPeers );
813 }
814
815 compGroup = targets[0]._compGroup;
816 numPeers = targets[0]._numPeers;
817 for(size_t i=0; i<targets.size(); i++){
818 if( compGroup != targets[i]._compGroup){
819 JASSERT(false)(compGroup)(targets[i]._compGroup)
820 .Text("ERROR: Restored programs belongs to different computation IDs");
821 }else if( numPeers != targets[i]._numPeers ){
822 JASSERT(false)(numPeers)(targets[i]._numPeers)
823 .Text("ERROR: Different numpber of processes saved in checkpoint images");
824 }
825 }
826
827 //------------------------
828 DmtcpWorker worker ( false );
829 WorkerState::setCurrentState ( WorkerState::RESTARTING );
830 ConnectionState ckptCoord ( conToFd );
831 worker.restoreSockets ( ckptCoord, compGroup, numPeers, coordTstamp );
832
833 #ifndef PID_VIRTUALIZATION
834 int i = (int)targets.size();
835
836 //fork into targs.size() processes
837 while(--i > 0){
838 int cid = fork();
839 if(cid==0) break;
840 else JASSERT(cid>0);
841 }
842 RestoreTarget& targ = targets[i];
843
844 JTRACE("forked, restoring process")(i)(targets.size())(targ.pid())(getpid());
845
846 //change UniquePid
847 UniquePid::resetOnFork(targ.pid());
848
849 //Reconnect to dmtcp_coordinator
850 WorkerState::setCurrentState ( WorkerState::RESTARTING );
851 worker.connectToCoordinatorWithoutHandshake();
852 worker.sendCoordinatorHandshake(targ.procname());
853
854 //restart targets[i]
855 targets[i].dupAllSockets ( slidingFd );
856 targets[i].mtcpRestart();
857
858 JASSERT ( false ).Text ( "unreachable" );
859 return -1;
860 #else
861 size_t i = targets.size();
862
863 // Create roots vector, assign children to their parents.
864 // Delete children that don't exist.
865 BuildProcessTree();
866
867 // Process all checkpoints to find one of them that can switch
868 // needed group to foreground.
869 ProcessGroupInfo();
870 // Create session meta-information in each node of the process tree.
871 // Node contains info about all sessions which exists at lower levels.
872 // Also node is aware of session leader existence at lower levels.
873 SetupSessions();
874
875 /* Create the file to hold the pid/tid maps. */
876 openOriginalToCurrentMappingFiles();
877
878 int pgrp_index=-1;
879 JTRACE ( "Creating ROOT Processes" )(roots.size());
880 for ( int j = 0 ; j < roots.size(); ++j )
881 {
882 if( roots[j].indep == false ){
883 // We will restore this process from one of the independent roots.
884 continue;
885 }
886 if (pgrp_index == -1 && !roots[j].t->isInitChild() ){
887 pgrp_index = j;
888 continue;
889 }
890
891 pid_t cid = fork();
892 if ( cid == 0 ){
893 JTRACE ( "Root of process tree" ) ( _real_getpid() ) ( _real_getppid() );
894 if( roots[j].t->isInitChild() ){
895 JTRACE ( "Create init-child process" ) ( _real_getpid() )
896 ( _real_getppid() );
897 if( fork() )
898 _exit(0);
899 }
900 roots[j].t->CreateProcess(worker, slidingFd);
901 JASSERT (false) . Text( "Unreachable" );
902 }
903 JASSERT ( cid > 0 );
904 if( roots[j].t->isInitChild() ){
905 waitpid(cid, NULL, 0);
906 }
907 }
908
909 JTRACE("Restore processes without corresponding Root Target");
910 int flat_index = -1;
911 int j = 0;
912 if( pgrp_index < 0 ){ // No root processes at all
913 // Find first flat process that can replace currently running
914 // dmtcp_restart context.
915 for (j = 0; j < targets.size(); ++j){
916 if( !targets[j]._used ){
917 // Save first flat-like process to be restored after all others
918 flat_index = j;
919 j++;
920 break;
921 }
922 }
923 }
924 // Use j set to 0 (if at least one root non-init-child process exists),
925 // or else j set to some value if no such process found.
926 for(; j < targets.size(); ++j)
927 {
928 if( !targets[j]._used ){
929 if( pgrp_index < 0 ){
930 // Save first flat-like process to be restored after all others
931 pgrp_index = j;
932 continue;
933 }else{
934 targets[j].CreateProcess(worker, slidingFd);
935 JTRACE("Need in flat-like restore for process")(targets[j].pid());
936 }
937 }
938 }
939
940 if( pgrp_index >=0 ){
941 JTRACE("Restore first Root Target")(roots[pgrp_index].t->pid());
942 roots[pgrp_index].t->CreateProcess(worker, slidingFd);
943 }else if (flat_index >= 0){
944 JTRACE("Restore first Flat Target")(targets[flat_index].pid());
945 targets[flat_index].CreateProcess(worker, slidingFd );
946 }else{
947 // FIXME: Under what conditions will this path be exercised?
948 JNOTE ("unknown type of target?") (targets[flat_index]._path);
949 }
950 #endif
951 }
952
953 #ifdef PID_VIRTUALIZATION
954 void BuildProcessTree()
955 {
956 for (size_t j = 0; j < targets.size(); ++j)
957 {
958 VirtualPidTable& virtualPidTable = targets[j].getVirtualPidTable();
959 originalPidTable.insertFromVirtualPidTable ( virtualPidTable );
960 if( virtualPidTable.isRootOfProcessTree() == true ){
961 // If this process is independent (root of process tree
962 RootTarget rt;
963 rt.t = &targets[j];
964 rt.indep = true;
965 roots.push_back(rt);
966 targets[j]._used = true;
967 }else if( !targets[j]._used ){
968 // We set used flag if we use target as somebodys child. If it is used - no need to check is it roor
969 // Iterate through all targets and try to find the one who has this process
970 // as child process
971 JTRACE("Process is not root of process tree: try to find if it has parent");
972 bool is_root = true;
973 for (size_t i = 0; i < targets.size(); i++) {
974 VirtualPidTable & virtualPidTable = targets[i].getVirtualPidTable();
975 VirtualPidTable::iterator it;
976 // Search inside the child list of target[j], make sure that i != j
977 for (it = virtualPidTable.begin(); (i != j) && (it != virtualPidTable.end()) ; it++) {
978 UniquePid& childUniquePid = it->second;
979 JTRACE("Check child")(childUniquePid)(" parent ")(targets[i].pid())("checked ")(targets[j].pid());
980 if (childUniquePid == targets[j].pid()){
981 is_root = false;
982 break;
983 }
984 }
985 }
986 JTRACE("Root detection:")(is_root)(targets[j].pid());
987 if( is_root ){
988 RootTarget rt;
989 rt.t = &targets[j];
990 rt.indep = true;
991 roots.push_back(rt);
992 targets[j]._used = true;
993 }
994 }
995
996 // Add all children
997 VirtualPidTable::iterator it;
998 for(it = virtualPidTable.begin(); it != virtualPidTable.end(); it++ ){
999 // find target
1000 bool found = false;
1001 pid_t childOriginalPid = it->first;
1002 UniquePid& childUniquePid = it->second;
1003
1004 for ( size_t i = 0; i < targets.size(); i++ )
1005 {
1006 if ( childUniquePid == targets[i].pid() )
1007 {
1008 found = 1;
1009 JTRACE ( "Add child to current target" ) ( targets[j].pid() ) ( childUniquePid );
1010 targets[i]._used = true;
1011 targets[j].addChild(&targets[i]);
1012 }
1013 }
1014 if ( !found ){
1015 JTRACE("Child not found")(childOriginalPid);
1016 virtualPidTable.erase( childOriginalPid );
1017 }
1018 }
1019 }
1020 }
1021
1022 /*
1023 * Group processing
1024 * 1. Divide all processes into sessions
1025 * 2. Divide processes in each session into groups
1026 * 3. In each group check that stored foreground values are equal.
1027 * If not, something's wrong: ABORT
1028 * 4. In each session choose the process that can bring appropriate group
1029 * to foreground
1030 * 5. Serialize information about chosen UniquePIDs in following
1031 * format: "COUNT:unique-pid1:unique-pid2:..."
1032 * 6. Deserialize information from step 5 in forked and restored processes.
1033 *
1034 */
1035
1036 class group {
1037 public:
1038 group(){
1039 gid = -2;
1040 }
1041 pid_t gid;
1042 vector<RestoreTarget*> targets;
1043 };
1044
1045 class session{
1046 public:
1047 session(){
1048 sid = -2;
1049 fgid = -2;
1050 }
1051 pid_t sid;
1052 pid_t fgid;
1053 map<pid_t,group> groups;
1054 typedef map<pid_t,group>::iterator group_it;
1055 UniquePid upid;
1056 };
1057
1058 void ProcessGroupInfo()
1059 {
1060 map<pid_t,session> smap;
1061 map<pid_t,session>::iterator it;
1062
1063 // 1. divide processes into sessions and groups
1064 for (size_t j = 0; j < targets.size(); j++)
1065 {
1066 VirtualPidTable& virtualPidTable = targets[j].getVirtualPidTable();
1067 JTRACE("Process ")
1068 (virtualPidTable.pid())(virtualPidTable.ppid())(virtualPidTable.sid())
1069 (virtualPidTable.gid())(virtualPidTable.fgid())
1070 (virtualPidTable.isRootOfProcessTree());
1071
1072 pid_t sid = virtualPidTable.sid();
1073 pid_t gid = virtualPidTable.gid();
1074 pid_t fgid = virtualPidTable.fgid();
1075
1076 /*
1077 // If group ID doesn't belong to known PIDs, indicate that fact
1078 // using -1 value.
1079 if( !virtualPidTable.pidExists(gid) ){
1080 JTRACE("DROP gid")(gid);
1081 virtualPidTable.setgid(-1);
1082 gid = -1;
1083 }
1084 // If foreground group ID not belongs to known PIDs,
1085 // indicate that fact using -1 value.
1086 if( !virtualPidTable.pidExists(fgid) ){
1087 JTRACE("DROP fgid")(fgid);
1088 virtualPidTable.setfgid(-1);
1089 fgid = -1;
1090 }
1091 */
1092
1093 session &s = smap[sid];
1094 // if this is first element of this session
1095 if( s.sid == -2 ){
1096 s.sid = sid;
1097 }
1098 group &g = smap[sid].groups[gid];
1099 // if this is first element of group gid
1100 if( g.gid == -2 ){
1101 g.gid = gid;
1102 }
1103 g.targets.push_back(&targets[j]);
1104 }
1105
1106 // 2. Check if foreground setting is correct
1107 it = smap.begin();
1108 for(;it != smap.end();it++){
1109 session &s = it->second;
1110 session::group_it g_it = s.groups.begin();
1111 pid_t fgid = -2;
1112 for(; g_it!=s.groups.end();g_it++){
1113 group &g = g_it->second;
1114 for(size_t k=0; k<g.targets.size(); k++){
1115 VirtualPidTable& virtualPidTable = g.targets[k]->getVirtualPidTable();
1116 pid_t cfgid = virtualPidTable.fgid();
1117 if( fgid == -2 ){
1118 fgid = cfgid;
1119 }else if( fgid != -1 && cfgid != -1 && fgid != cfgid ){
1120 printf("Error: process from same session stores different"
1121 " foreground group ID: %d, %d\n", fgid, cfgid);
1122 // DEBUG PRINTOUT:
1123 {
1124 session::group_it g_it1 = s.groups.begin();
1125 for(; g_it1!=s.groups.end();g_it1++){
1126 group &g1 = g_it1->second;
1127 for(size_t m=0; m<g1.targets.size() ;m++){
1128 VirtualPidTable& virtualPidTable = g1.targets[m]->getVirtualPidTable();
1129 pid_t pid = virtualPidTable.pid();
1130 pid_t cfgid = virtualPidTable.fgid();
1131 printf("PID=%d <--> FGID = %d\n",pid,cfgid);
1132 }
1133 }
1134 }
1135 abort();
1136 }
1137 }
1138 JTRACE("Checked ")(fgid);
1139 }
1140 s.fgid = fgid;
1141 if( s.groups.find(s.fgid) == s.groups.end() ){
1142 // foreground group is missing, don't need to change foreground groop
1143 s.fgid = -1;
1144 }
1145
1146 {
1147 session::group_it g_it1 = s.groups.begin();
1148 for(; g_it1!=s.groups.end();g_it1++){
1149 group &g1 = g_it1->second;
1150 for(size_t m=0; m<g1.targets.size(); m++){
1151 VirtualPidTable& virtualPidTable = g1.targets[m]->getVirtualPidTable();
1152 pid_t pid = virtualPidTable.pid();
1153 pid_t cfgid = virtualPidTable.fgid();
1154 JTRACE("PID=%d <--> FGID = %d")(pid)(cfgid);
1155 }
1156 }
1157 }
1158 }
1159
1160 // Print out session mapping.
1161 JTRACE("Session number:")(smap.size());
1162 it = smap.begin();
1163 for( ; it != smap.end(); it++ ){
1164 session &s = it->second;
1165 JTRACE("Session printout:")(s.sid)(s.fgid)(s.upid.toString().c_str());
1166 session::group_it g_it = s.groups.begin();
1167 for(; g_it!=s.groups.end();g_it++){
1168 group &g = g_it->second;
1169 JTRACE("\tGroup ID: ")(g.gid);
1170 /*
1171 for(k=0; k<g.targets.size() ;k++){
1172 printf("%d ", g.targets[k]->pid().pid());
1173 }
1174 printf("\n");
1175 */
1176 }
1177 }
1178 }
1179
1180 void SetupSessions()
1181 {
1182 for(size_t j=0; j < roots.size(); j++){
1183 roots[j].t->setupSessions();
1184 }
1185
1186 for(size_t i = 0; i < roots.size(); i++){
1187 for(size_t j = 0; j < roots.size(); j++){
1188 if( i == j )
1189 continue;
1190 pid_t sid;
1191 if( (sid = (roots[i].t)->checkDependence(roots[j].t)) >= 0 ){
1192 // it2 depends on it1
1193 JTRACE("Root target j depends on Root target i")(i)(roots[i].t->pid())(j)(roots[j].t->pid());
1194 (roots[i].t)->addRoot(roots[j].t, sid);
1195 roots[j].indep = false;
1196 }
1197 }
1198 }
1199 }
1200
1201 int openSharedFile(dmtcp::string name, int flags)
1202 {
1203 int fd;
1204 // try to create, truncate & open file
1205 if( (fd = open(name.c_str(), O_EXCL|O_CREAT|O_TRUNC | flags, 0600)) >= 0) {
1206 return fd;
1207 }
1208 if (fd < 0 && errno == EEXIST) {
1209 if ((fd = open(name.c_str(), flags, 0600)) > 0) {
1210 return fd;
1211 }
1212 }
1213 // unable to create & open OR open
1214 JASSERT( false )(name)(strerror(errno)).Text("Cannot open file");
1215 return -1;
1216 }
1217
1218 static void openOriginalToCurrentMappingFiles()
1219 {
1220 dmtcp::ostringstream pidMapFile, pidMapCountFile;
1221 dmtcp::ostringstream shmidListFile, shmidMapFile;
1222 int fd;
1223
1224 shmidMapFile << dmtcpTmpDir << "/dmtcpShmidMap."
1225 << compGroup << "." << std::hex << coordTstamp;
1226 shmidListFile << dmtcpTmpDir << "/dmtcpShmidList."
1227 << compGroup << "." << std::hex << coordTstamp;
1228
1229 pidMapFile << dmtcpTmpDir << "/dmtcpPidMap."
1230 << compGroup << "." << std::hex << coordTstamp;
1231 pidMapCountFile << dmtcpTmpDir << "/dmtcpPidMapCount."
1232 << compGroup << "." << std::hex << coordTstamp;
1233
1234 // Open and create shmidListFile if it doesn't exist.
1235 JTRACE("Open dmtcpShmidMapFile")(shmidListFile.str());
1236 fd = openSharedFile(shmidListFile.str(), (O_WRONLY|O_APPEND));
1237 JASSERT ( fd != -1 );
1238 JASSERT ( dup2 ( fd, PROTECTED_SHMIDLIST_FD ) == PROTECTED_SHMIDLIST_FD )
1239 ( shmidListFile.str() );
1240 close (fd);
1241
1242 // Open and create shmidMapFile if it doesn't exist.
1243 JTRACE("Open dmtcpShmidMapFile")(shmidMapFile.str());
1244 fd = openSharedFile(shmidMapFile.str(), (O_WRONLY|O_APPEND));
1245 JASSERT ( fd != -1 );
1246 JASSERT ( dup2 ( fd, PROTECTED_SHMIDMAP_FD ) == PROTECTED_SHMIDMAP_FD )
1247 ( shmidMapFile.str() );
1248 close (fd);
1249
1250 // Open and create pidMapFile if it doesn't exist.
1251 JTRACE("Open dmtcpPidMapFile")(pidMapFile.str());
1252 fd = openSharedFile(pidMapFile.str(), (O_WRONLY|O_APPEND));
1253 JASSERT ( fd != -1 );
1254 JASSERT ( dup2 ( fd, PROTECTED_PIDMAP_FD ) == PROTECTED_PIDMAP_FD )
1255 ( pidMapFile.str() );
1256 close (fd);
1257
1258 // Open and create pidMapCountFile if it doesn't exist.
1259 JTRACE("Open dmtcpPidMapCount files for writing")(pidMapCountFile.str());
1260 fd = openSharedFile(pidMapCountFile.str(), O_RDWR);
1261 JASSERT ( fd != -1 );
1262 JASSERT ( dup2 ( fd, PROTECTED_PIDMAPCNT_FD ) == PROTECTED_PIDMAPCNT_FD )
1263 ( pidMapCountFile.str() );
1264 close(fd);
1265
1266 dmtcp::Util::lockFile(PROTECTED_PIDMAPCNT_FD);
1267
1268 // Initialize pidMapCountFile with zero value.
1269 static jalib::JBinarySerializeWriterRaw countwr(pidMapCountFile.str(),
1270 PROTECTED_PIDMAPCNT_FD);
1271 if( countwr.isempty() ){
1272 JTRACE("pidMapCountFile is empty. Initialize it with count = 0")
1273 (pidMapCountFile.str());
1274 size_t numMaps = 0;
1275 dmtcp::VirtualPidTable::serializeEntryCount (countwr, numMaps);
1276 fsync(PROTECTED_PIDMAPCNT_FD);
1277 }else{
1278 JTRACE("pidMapCountFile is not empty - do nothing");
1279 }
1280
1281 dmtcp::Util::unlockFile(PROTECTED_PIDMAPCNT_FD);
1282 }
1283 #endif
1284
1285 static void runMtcpRestore ( const char* path, int offset )
1286 {
1287 static dmtcp::string mtcprestart = jalib::Filesystem::FindHelperUtility ( "mtcp_restart" );
1288
1289 // Tell mtcp_restart process to write its debugging information to
1290 // PROTECTED_STDERR_FD. This way we prevent it from spitting out garbage onto
1291 // FD_STDERR if it is being used by the user process in a special way.
1292 char protected_stderr_fd_str[16];
1293 sprintf(protected_stderr_fd_str, "%d", PROTECTED_STDERR_FD);
1294
1295 #ifdef USE_MTCP_FD_CALLING
1296 int fd = ConnectionToFds::openMtcpCheckpointFile(path);
1297 char buf[64];
1298 sprintf(buf, "%d", fd);
1299 char buf2[64];
1300 // gzip_child_pid set by openMtcpCheckpointFile() above.
1301 sprintf(buf2, "%d", dmtcp::ConnectionToFds::gzip_child_pid);
1302
1303 char* newArgs[] = {
1304 ( char* ) mtcprestart.c_str(),
1305 ( char* ) "--stderr-fd",
1306 protected_stderr_fd_str,
1307 ( char* ) "--fd",
1308 buf,
1309 ( char* ) "--gzip-child-pid",
1310 buf2,
1311 NULL
1312 };
1313 if (dmtcp::ConnectionToFds::gzip_child_pid == -1) // If no gzip compression
1314 newArgs[3] = NULL;
1315
1316 JTRACE ( "launching mtcp_restart --fd" )(fd)(path);
1317 #else
1318 char buf[64];
1319 sprintf(buf, "%d", offset);
1320
1321 char* newArgs[] = {
1322 ( char* ) mtcprestart.c_str(),
1323 ( char* ) "--stderr-fd",
1324 protected_stderr_fd_str,
1325 ( char* ) "--offset",
1326 buf,
1327 (char*) path,
1328 NULL
1329 };
1330
1331 JTRACE ( "launching mtcp_restart --offset" )(path)(offset);
1332
1333 #endif
1334
1335 execvp ( newArgs[0], newArgs );
1336 JASSERT ( false ) ( newArgs[0] ) ( newArgs[1] ) ( JASSERT_ERRNO )
1337 .Text ( "exec() failed" );
1338 }