1    	/****************************************************************************
2    	 *   Copyright (C) 2006-2010 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3    	 *   jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu              *
4    	 *                                                                          *
5    	 *   This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src).  *
6    	 *                                                                          *
7    	 *  DMTCP:dmtcp/src is free software: you can redistribute it and/or        *
8    	 *  modify it under the terms of the GNU Lesser General Public License as   *
9    	 *  published by the Free Software Foundation, either version 3 of the      *
10   	 *  License, or (at your option) any later version.                         *
11   	 *                                                                          *
12   	 *  DMTCP:dmtcp/src is distributed in the hope that it will be useful,      *
13   	 *  but WITHOUT ANY WARRANTY; without even the implied warranty of          *
14   	 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the           *
15   	 *  GNU Lesser General Public License for more details.                     *
16   	 *                                                                          *
17   	 *  You should have received a copy of the GNU Lesser General Public        *
18   	 *  License along with DMTCP:dmtcp/src.  If not, see                        *
19   	 *  <http://www.gnu.org/licenses/>.                                         *
20   	 ****************************************************************************/
21   	
22   	#include "connection.h"
23   	#include  "../jalib/jassert.h"
24   	#include  "../jalib/jfilesystem.h"
25   	#include  "../jalib/jconvert.h"
26   	#include "kernelbufferdrainer.h"
27   	#include "syscallwrappers.h"
28   	#include "connectionrewirer.h"
29   	#include "connectionmanager.h"
30   	#include "dmtcpmessagetypes.h"
31   	#include "dmtcpworker.h"
32   	#include "virtualpidtable.h"
33   	#include "util.h"
34   	#include  "../jalib/jsocket.h"
35   	#include <sys/ioctl.h>
36   	#include <sys/select.h>
37   	#include <sys/un.h>
38   	#include <unistd.h>
39   	#include <fcntl.h>
40   	#include <sys/file.h>
41   	#include <termios.h>
42   	#include <iostream>
43   	#include <ios>
44   	#include <fstream>
45   	#include <linux/limits.h>
46   	#include <arpa/inet.h>
47   	
48   	static bool ptmxTestPacketMode(int masterFd);
49   	static ssize_t ptmxReadAll(int fd, const void *origBuf, size_t maxCount);
50   	static ssize_t ptmxWriteAll(int fd, const void *buf, bool isPacketMode);
51   	
52   	static dmtcp::string _procFDPath ( int fd )
53   	{
54   	  return "/proc/self/fd/" + jalib::XToString ( fd );
55   	}
56   	
57   	static bool hasLock ( const dmtcp::vector<int>& fds )
58   	{
59   	  JASSERT ( fds.size() > 0 );
60   	  int owner = fcntl ( fds[0], F_GETOWN );
61   	  JASSERT ( owner != 0 );
62   	  int self = _real_getpid();
63   	  JASSERT ( self >= 0 );
64   	  return owner == self;
65   	}
66   	
67   	//this function creates a socket that is in an error state
68   	static int _makeDeadSocket()
69   	{
70   	  //it does it by creating a socket pair and closing one side
71   	  int sp[2] = {-1,-1};
72   	  JASSERT ( _real_socketpair ( AF_UNIX, SOCK_STREAM, 0, sp ) == 0 ) ( JASSERT_ERRNO )
73   	    .Text ( "socketpair() failed" );
74   	  JASSERT ( sp[0]>=0 && sp[1]>=0 ) ( sp[0] ) ( sp[1] )
75   	    .Text ( "socketpair() failed" );
76   	  _real_close ( sp[1] );
77   	  JTRACE ( "Created dead socket." ) ( sp[0] );
78   	  return sp[0];
79   	}
80   	
81   	static bool _isVimApp ( )
82   	{
83   	  static int isVimApp = -1;
84   	
85   	  if (isVimApp == -1) {
86   	    dmtcp::string progName = jalib::Filesystem::GetProgramName();
87   	
88   	    if (progName == "vi" || progName == "vim" || progName == "vim-normal" || 
89   	        progName == "vim.basic"  || progName == "vim.tiny" ||
90   	        progName == "vim.gtk" || progName == "vim.gnome" ) {
91   	      isVimApp = 1;
92   	    } else {
93   	      isVimApp = 0;
94   	    }
95   	  }
96   	  return isVimApp;
97   	}
98   	
99   	static bool _isBlacklistedFile ( dmtcp::string& path )
100  	{
101  	  if ((dmtcp::Util::strStartsWith(path, "/dev/") &&
102  	       !dmtcp::Util::strStartsWith(path, "/dev/shm/"))) {
103  	    return true;
104  	  }
105  	  return false;
106  	}
107  	/*
108  	 * Calculate X11 listener port using the env var "DISPLAY". It is computed in
109  	 * the following manner:
110  	 *   hostname:D.S means screen S on display D of host hostname; 
111  	 *     the X server for this display is listening at TCP port 6000+D.
112  	 */
113  	static short int _X11ListenerPort() {
114  	  short int port = -1;
115  	  const char *str = getenv("DISPLAY");
116  	  if (str != NULL) {
117  	    dmtcp::string display = str;
118  	    int idx = display.find_last_of(':');
119  	    char *dummy;
120  	    port = X11_LISTENER_PORT_START 
121  	         + strtol(display.c_str() + idx + 1, &dummy, 10);
122  	    JTRACE("X11 Listener Port found") (port);
123  	  }
124  	  return port;
125  	}
126  	
127  	static bool _isBlacklistedTcp ( int sockfd, const sockaddr* saddr, socklen_t len )
128  	{
129  	  JASSERT( saddr != NULL );
130  	
131  	  if ( saddr->sa_family == AF_FILE ) {
132  	    const char* un_path = ( ( sockaddr_un* ) saddr )->sun_path;
133  	    if (un_path[0] == '\0') {
134  	      /* The first byte is null, which indicates abstract socket name */
135  	      un_path++;
136  	    }
137  	    dmtcp::string path = jalib::Filesystem::DirBaseName( un_path );
138  	
139  	    if (path == "/tmp/.ICE-unix" || path == "/tmp/.X11-unix" ||
140  	        path == "/var/run/nscd") { 
141  	      JTRACE("connect() to external process (X-server). Will not be drained")
142  	        (sockfd) (path);
143  	      return true;
144  	    }
145  	  } else if ( saddr->sa_family == AF_INET ) {
146  	    struct sockaddr_in* addr = ( sockaddr_in* ) saddr;
147  	    int port = ntohs(addr->sin_port);
148  	    char inet_addr[32];
149  	    inet_ntop(AF_INET, &(addr->sin_addr), inet_addr, sizeof(inet_addr));
150  	    if (strcmp(inet_addr, "127.0.0.1") == 0 && port == _X11ListenerPort()) {
151  	      JTRACE("connect() to external process. Will not be drained") 
152  	        (sockfd) (inet_addr) (port);
153  	      return true;
154  	    }
155  	  }
156  	  return false;
157  	}
158  	
159  	dmtcp::Connection::Connection ( int t )
160  	  : _id ( ConnectionIdentifier::Create() )
161  	  , _type ( ( ConnectionType ) t )
162  	  , _fcntlFlags ( -1 )
163  	  , _fcntlOwner ( -1 )
164  	  , _fcntlSignal ( -1 )
165  	  , _restoreInSecondIteration ( false )
166  	{}
167  	
168  	dmtcp::TcpConnection& dmtcp::Connection::asTcp()
169  	{
170  	  JASSERT ( false ) ( _id ) ( _type ).Text ( "Invalid conversion." );
171  	  return * ( ( TcpConnection* ) 0 );
172  	}
173  	
174  	void dmtcp::Connection::restartDup2(int oldFd, int fd){
175  	  errno = 0;
176  	  JWARNING ( _real_dup2 ( oldFd, fd ) == fd ) ( oldFd ) ( fd ) ( JASSERT_ERRNO );
177  	}
178  	
179  	void dmtcp::Connection::saveOptions ( const dmtcp::vector<int>& fds )
180  	{
181  	  errno = 0;
182  	  _fcntlFlags = fcntl ( fds[0],F_GETFL );
183  	  JASSERT ( _fcntlFlags >= 0 ) ( _fcntlFlags ) ( JASSERT_ERRNO );
184  	  errno = 0;
185  	  _fcntlOwner = fcntl ( fds[0],F_GETOWN );
186  	  JASSERT ( _fcntlOwner != -1 ) ( _fcntlOwner ) ( JASSERT_ERRNO );
187  	  errno = 0;
188  	  _fcntlSignal = fcntl ( fds[0],F_GETSIG );
189  	  JASSERT ( _fcntlSignal >= 0 ) ( _fcntlSignal ) ( JASSERT_ERRNO );
190  	}
191  	
192  	void dmtcp::Connection::restoreOptions ( const dmtcp::vector<int>& fds )
193  	{
194  	  //restore F_GETFL flags
195  	  JASSERT ( _fcntlFlags >= 0 ) ( _fcntlFlags );
196  	  JASSERT ( _fcntlOwner != -1 ) ( _fcntlOwner );
197  	  JASSERT ( _fcntlSignal >= 0 ) ( _fcntlSignal );
198  	  errno = 0;
199  	  JASSERT ( fcntl ( fds[0], F_SETFL, _fcntlFlags ) == 0 ) ( fds[0] ) ( _fcntlFlags ) ( JASSERT_ERRNO );
200  	  errno = 0;
201  	  JASSERT ( fcntl ( fds[0], F_SETOWN, ORIGINAL_TO_CURRENT_PID(_fcntlOwner) ) == 0 ) 
202  	    ( fds[0] ) ( _fcntlOwner ) ( JASSERT_ERRNO );
203  	  errno = 0;
204  	  JASSERT ( fcntl ( fds[0], F_SETSIG,_fcntlSignal ) == 0 ) ( fds[0] ) ( _fcntlSignal ) ( JASSERT_ERRNO );
205  	}
206  	
207  	void dmtcp::Connection::doLocking ( const dmtcp::vector<int>& fds )
208  	{
209  	  errno = 0;
210  	  JASSERT ( fcntl ( fds[0], F_SETOWN, _real_getpid() ) == 0 ) 
211  	    ( fds[0] ) ( JASSERT_ERRNO );
212  	}
213  	
214  	/////////////////////////
215  	////// TCP UPDATE COMMANDS
216  	
217  	/*onSocket*/
218  	dmtcp::TcpConnection::TcpConnection ( int domain, int type, int protocol )
219  	  : Connection ( TCP_CREATED )
220  	#ifdef EXTERNAL_SOCKET_HANDLING
221  	  , _peerType ( PEER_UNKNOWN )
222  	#endif
223  	  , _sockDomain ( domain )
224  	  , _sockType ( type )
225  	  , _sockProtocol ( protocol )
226  	  , _listenBacklog ( -1 )
227  	  , _bindAddrlen ( 0 )
228  	  , _acceptRemoteId ( ConnectionIdentifier::Null() )
229  	{
230  	  if (domain != -1)
231  	    JTRACE ("Creating TcpConnection.") ( id() ) ( domain ) ( type ) ( protocol );
232  	  memset ( &_bindAddr, 0, sizeof _bindAddr );
233  	}
234  	
235  	dmtcp::TcpConnection& dmtcp::TcpConnection::asTcp()
236  	{
237  	  return *this;
238  	}
239  	
240  	void dmtcp::TcpConnection::onBind ( const struct sockaddr* addr, socklen_t len )
241  	{
242  	  JTRACE ("Binding.") ( id() ) ( len );
243  	
244  	  JASSERT ( tcpType() == TCP_CREATED ) ( tcpType() ) ( id() )
245  	    .Text ( "Binding a socket in use????" );
246  	  JASSERT ( len <= sizeof _bindAddr ) ( len ) ( sizeof _bindAddr )
247  	    .Text ( "That is one huge sockaddr buddy." );
248  	
249  	  _type = TCP_BIND;
250  	  _bindAddrlen = len;
251  	  memcpy ( &_bindAddr, addr, len );
252  	}
253  	void dmtcp::TcpConnection::onListen ( int backlog )
254  	{
255  	  JTRACE ( "Listening." ) ( id() ) ( backlog );
256  	
257  	  JASSERT ( tcpType() == TCP_BIND ) ( tcpType() ) ( id() )
258  	    .Text ( "Listening on a non-bind()ed socket????" );
259  	  JASSERT ( backlog > 0 ) ( backlog )
260  	    .Text ( "That is an odd backlog????" );
261  	
262  	  _type = TCP_LISTEN;
263  	  _listenBacklog = backlog;
264  	}
265  	void dmtcp::TcpConnection::onConnect( int sockfd, 
266  	                                      const  struct sockaddr *addr, 
267  	                                      socklen_t len )
268  	{
269  	  JTRACE ( "Connecting." ) ( id() );
270  	
271  	  JASSERT ( tcpType() == TCP_CREATED ) ( tcpType() ) ( id() )
272  	    .Text ( "Connecting with an in-use socket????" );
273  	
274  	  if (addr != NULL && _isBlacklistedTcp(sockfd, addr, len) ) {
275  	    _type = TCP_EXTERNAL_CONNECT;
276  	    _connectAddrlen = len;
277  	    memcpy ( &_connectAddr, addr, len );
278  	  } else {
279  	    _type = TCP_CONNECT;
280  	  }
281  	}
282  	
283  	/*onAccept*/
284  	dmtcp::TcpConnection::TcpConnection ( const TcpConnection& parent, const ConnectionIdentifier& remote )
285  	  : Connection ( TCP_ACCEPT )
286  	#ifdef EXTERNAL_SOCKET_HANDLING
287  	  , _peerType ( PEER_UNKNOWN )
288  	#endif
289  	  , _sockDomain ( parent._sockDomain )
290  	  , _sockType ( parent._sockType )
291  	  , _sockProtocol ( parent._sockProtocol )
292  	  , _listenBacklog ( -1 )
293  	  , _bindAddrlen ( 0 )
294  	  , _acceptRemoteId ( remote )
295  	{
296  	  JTRACE ( "Accepting." ) ( id() ) ( parent.id() ) ( remote );
297  	
298  	  //     JASSERT(parent.tcpType() == TCP_LISTEN)(parent.tcpType())(parent.id())
299  	  //             .Text("Accepting from a non listening socket????");
300  	  memset ( &_bindAddr, 0, sizeof _bindAddr );
301  	}
302  	void dmtcp::TcpConnection::onError()
303  	{
304  	  JTRACE ( "Error." ) ( id() );
305  	  _type = TCP_ERROR;
306  	}
307  	
308  	void dmtcp::TcpConnection::addSetsockopt ( int level, int option, const char* value, int len )
309  	{
310  	  _sockOptions[level][option] = jalib::JBuffer ( value, len );
311  	}
312  	
313  	
314  	////////////
315  	///// TCP CHECKPOINTING
316  	
317  	#ifdef EXTERNAL_SOCKET_HANDLING
318  	void dmtcp::TcpConnection::preCheckpointPeerLookup ( const dmtcp::vector<int>& fds,
319  	                                                     dmtcp::vector<TcpConnectionInfo>& conInfoTable)
320  	{
321  	  JASSERT ( fds.size() > 0 ) ( id() );
322  	
323  	  switch ( tcpType() )
324  	  {
325  	    case TCP_CONNECT:
326  	    case TCP_ACCEPT:
327  	      if ( hasLock ( fds ) && peerType() == PEER_UNKNOWN )
328  	      {
329  	        socklen_t addrlen_local = sizeof(struct sockaddr_storage);
330  	        socklen_t addrlen_remote = sizeof(struct sockaddr_storage);
331  	        struct sockaddr_storage local, remote;
332  	
333  	        JASSERT ( 0 == getsockname ( fds[0], (sockaddr*)&local, &addrlen_local ) ) (JASSERT_ERRNO);
334  	        JASSERT ( 0 == getpeername ( fds[0], (sockaddr*)&remote, &addrlen_remote ) ) (JASSERT_ERRNO);
335  	        JASSERT ( addrlen_local == addrlen_remote ) ( addrlen_local ) ( addrlen_remote );
336  	        JASSERT ( local.ss_family == remote.ss_family ) ( local.ss_family ) ( remote.ss_family );
337  	        TcpConnectionInfo conInfo(id(), addrlen_local, local, remote );
338  	        conInfoTable.push_back ( conInfo );
339  	      }
340  	      else
341  	      {
342  	        JTRACE ( "Did not get lock.  Won't lookup." ) ( fds[0] ) ( id() );
343  	      }
344  	      break;
345  	    case TCP_LISTEN:
346  	    case TCP_BIND:
347  	      JASSERT ( peerType() == PEER_UNKNOWN );
348  	      break;
349  	  }
350  	}
351  	#endif
352  	
353  	void dmtcp::TcpConnection::preCheckpoint ( const dmtcp::vector<int>& fds
354  	    , KernelBufferDrainer& drain )
355  	{
356  	  JASSERT ( fds.size() > 0 ) ( id() );
357  	
358  	  if ( ( _fcntlFlags & O_ASYNC ) != 0 )
359  	  {
360  	    JTRACE ( "Removing O_ASYNC flag during checkpoint." ) ( fds[0] ) ( id() );
361  	    errno = 0;
362  	    JASSERT ( fcntl ( fds[0],F_SETFL,_fcntlFlags & ~O_ASYNC ) == 0 ) ( JASSERT_ERRNO ) ( fds[0] ) ( id() );
363  	  }
364  	
365  	  switch ( tcpType() )
366  	  {
367  	    case TCP_CONNECT:
368  	    case TCP_ACCEPT:
369  	      if ( hasLock ( fds ) )
370  	      {
371  	        const ConnectionIdentifier& toDrainId = id();
372  	        JTRACE ( "Will drain socket" ) ( fds[0] ) ( toDrainId )
373  	          ( _acceptRemoteId );
374  	        drain.beginDrainOf ( fds[0], toDrainId );
375  	      }
376  	      else
377  	      {
378  	        JTRACE ( "Did not get lock.  Won't drain" ) ( fds[0] ) ( id() );
379  	      }
380  	      break;
381  	    case TCP_LISTEN:
382  	      drain.addListenSocket ( fds[0] );
383  	      break;
384  	    case TCP_BIND:
385  	      JWARNING ( tcpType() != TCP_BIND ) ( fds[0] )
386  	        .Text ( "If there are pending connections on this socket,\n"
387  			" they won't be checkpointed because"
388  			" it is not yet in a listen state." );
389  	      break;
390  	    case TCP_EXTERNAL_CONNECT:
391  	      JTRACE ( "Socket to External Process, won't be drained" ) ( fds[0] );
392  	      break;
393  	  }
394  	}
395  	
396  	void dmtcp::TcpConnection::doSendHandshakes( const dmtcp::vector<int>& fds, const dmtcp::UniquePid& coordinator ){
397  	    switch ( tcpType() )
398  	    {
399  	      case TCP_CONNECT:
400  	      case TCP_ACCEPT:
401  	        if ( hasLock ( fds ) )
402  	        {
403  	          JTRACE ("Sending handshake ...") (id()) (fds[0]);
404  	          jalib::JSocket sock(fds[0]);
405  	          sendHandshake( sock, coordinator );
406  	        }
407  	        else
408  	        {
409  	          JTRACE("Skipping handshake send (shared socket, not owner).")
410  			(id()) (fds[0]);
411  	        }
412  	        break;
413  	      case TCP_EXTERNAL_CONNECT:
414  	        JTRACE ( "Socket to External Process, skipping handshake send" ) ( fds[0] );
415  	        break;
416  	    }
417  	  }
418  	  void dmtcp::TcpConnection::doRecvHandshakes( const dmtcp::vector<int>& fds, const dmtcp::UniquePid& coordinator ){
419  	    switch ( tcpType() )
420  	    {
421  	      case TCP_CONNECT:
422  	      case TCP_ACCEPT:
423  	        if ( hasLock ( fds ) )
424  	        {
425  	          JTRACE ("Receiving handshake ...") (id()) (fds[0]);
426  	          jalib::JSocket sock(fds[0]);
427  	          recvHandshake( sock, coordinator );
428  	          JTRACE ("Received handshake.") (getRemoteId()) (fds[0]);
429  	        }
430  	        else
431  	        {
432  	          JTRACE ("Skipping handshake recv (shared socket, not owner).")
433  			(id()) (fds[0]);
434  	        }
435  	        break;
436  	      case TCP_EXTERNAL_CONNECT:
437  	        JTRACE ( "Socket to External Process, skipping handshake recv" ) ( fds[0] );
438  	        break;
439  	    }
440  	  }
441  	
442  	void dmtcp::TcpConnection::postCheckpoint ( const dmtcp::vector<int>& fds, bool isRestart )
443  	{
444  	  if ( ( _fcntlFlags & O_ASYNC ) != 0 )
445  	  {
446  	    JTRACE ( "Re-adding O_ASYNC flag." ) ( fds[0] ) ( id() );
447  	    restoreOptions ( fds );
448  	  }
449  	}
450  	void dmtcp::TcpConnection::restore ( const dmtcp::vector<int>& fds, ConnectionRewirer& rewirer )
451  	{
452  	  JASSERT ( fds.size() > 0 );
453  	  switch ( tcpType() )
454  	  {
455  	    case TCP_PREEXISTING:
456  	    case TCP_ERROR: //not a valid socket
457  	    case TCP_INVALID:
458  	    case TCP_EXTERNAL_CONNECT:
459  	    {
460  	      JTRACE("Creating dead socket.") (fds[0]) (fds.size());
461  	      jalib::JSocket deadSock ( _makeDeadSocket() );
462  	      deadSock.changeFd ( fds[0] );
463  	      for ( size_t i=1; i<fds.size(); ++i )
464  	      {
465  	        JASSERT ( _real_dup2 ( fds[0], fds[i] ) == fds[i] ) ( fds[0] ) ( fds[i] )
466  	          .Text ( "dup2() failed" );
467  	      }
468  	      break;
469  	     }
470  	    case TCP_CREATED:
471  	    case TCP_BIND:
472  	    case TCP_LISTEN:
473  	    {
474  	      JWARNING (  (_sockDomain == AF_INET || _sockDomain == AF_UNIX || _sockDomain == AF_INET6) && _sockType == SOCK_STREAM )
475  	        ( id() )
476  	        ( _sockDomain )
477  	        ( _sockType )
478  	        ( _sockProtocol )
479  	        .Text ( "Socket type not yet [fully] supported." );
480  	
481  	      JTRACE ( "Restoring socket." ) ( id() ) ( fds[0] );
482  	
483  	      jalib::JSocket sock ( _real_socket ( _sockDomain,_sockType,_sockProtocol ) );
484  	      JASSERT ( sock.isValid() );
485  	      sock.changeFd ( fds[0] );
486  	
487  	      for ( size_t i=1; i<fds.size(); ++i )
488  	      {
489  	        JASSERT ( _real_dup2 ( fds[0], fds[i] ) == fds[i] ) ( fds[0] ) ( fds[i] )
490  	          .Text ( "dup2() failed" );
491  	      }
492  	
493  	      if ( tcpType() == TCP_CREATED ) break;
494  	
495  	      if ( _sockDomain == AF_UNIX )
496  	      {
497  	        const char* un_path = ( ( sockaddr_un* ) &_bindAddr )->sun_path;
498  	        JTRACE ( "Unlinking stale unix domain socket." ) ( un_path );
499  	        JWARNING ( unlink ( un_path ) == 0 ) ( un_path );
500  	      }
501  	      /*
502  	       * During restart, some socket options must be restored (using
503  	       * setsockopt) before the socket is used (bind etc.), otherwise we might
504  	       * not be able to restore them at all. One such option is set in the
505  	       * following way for IPV6 family:
506  	       * setsockopt (sd, IPPROTO_IPV6, IPV6_V6ONLY,...)
507  	       * This fix works for now. A better approach would be to restore the
508  	       * socket options in the order in which they are set by the user program.
509  	       * This fix solves a bug that caused OpenMPI to fail to restart under
510  	       * DMTCP.
511  	       *                               --Kapil
512  	       */
513  	
514  	      if (_sockDomain == AF_INET6) {
515  	        JTRACE("Restoring some socket options before binding.");
516  	        typedef dmtcp::map< int, dmtcp::map< int, jalib::JBuffer > >::iterator levelIterator;
517  	        typedef dmtcp::map< int, jalib::JBuffer >::iterator optionIterator;
518  	
519  	        for ( levelIterator lvl = _sockOptions.begin();
520  		      lvl!=_sockOptions.end(); ++lvl ) {
521  	          if (lvl->first == IPPROTO_IPV6) {
522  	            for ( optionIterator opt = lvl->second.begin();
523  			  opt!=lvl->second.end(); ++opt ) {
524  	              if (opt->first == IPV6_V6ONLY) {
525  	              JTRACE ( "Restoring socket option." )
526  			     ( fds[0] ) ( opt->first ) ( opt->second.size() );
527  	              int ret = _real_setsockopt ( fds[0], lvl->first, opt->first,
528  						   opt->second.buffer(),
529  						   opt->second.size() );
530  	              JASSERT ( ret == 0 ) ( JASSERT_ERRNO ) ( fds[0] ) (lvl->first)
531  			      ( opt->first ) (opt->second.buffer()) ( opt->second.size() )
532  	                  .Text ( "Restoring setsockopt failed." );
533  	              }
534  	            }
535  	          }
536  	        }
537  	      }
538  	
539  	      JTRACE ( "Binding socket." ) ( id() );
540  	      errno = 0;
541  	      JWARNING ( sock.bind ( ( sockaddr* ) &_bindAddr,_bindAddrlen ) )
542  	        ( JASSERT_ERRNO ) ( id() )
543  	        .Text ( "Bind failed." );
544  	      if ( tcpType() == TCP_BIND ) break;
545  	
546  	      JTRACE ( "Listening socket." ) ( id() );
547  	      errno = 0;
548  	      JWARNING ( sock.listen ( _listenBacklog ) )
549  	        ( JASSERT_ERRNO ) ( id() ) ( _listenBacklog )
550  	        .Text ( "Bind failed." );
551  	      if ( tcpType() == TCP_LISTEN ) break;
552  	
553  	    }
554  	    break;
555  	    case TCP_ACCEPT:
556  	      JASSERT(!_acceptRemoteId.isNull())( id() ) ( _acceptRemoteId ) ( fds[0] )
557  	        .Text("Can't restore a TCP_ACCEPT socket with null acceptRemoteId.\n"
558  		      "  Perhaps handshake went wrong?");
559  	      JTRACE ( "registerOutgoing" ) ( id() ) ( _acceptRemoteId ) ( fds[0] );
560  	      rewirer.registerOutgoing ( _acceptRemoteId, fds );
561  	      break;
562  	    case TCP_CONNECT:
563  	      JTRACE ( "registerIncoming" ) ( id() ) ( _acceptRemoteId ) ( fds[0] );
564  	      rewirer.registerIncoming ( id(), fds );
565  	      break;
566  	//    case TCP_EXTERNAL_CONNECT:
567  	//      int sockFd = _real_socket ( _sockDomain, _sockType, _sockProtocol );
568  	//      JASSERT ( sockFd >= 0);
569  	//      JASSERT ( _real_dup2 ( sockFd, fds[0] ) == fds[0] );
570  	//      JWARNING(0 == _real_connect(sockFd, (sockaddr*) &_connectAddr, _connectAddrlen))
571  	//        (fds[0]) (JASSERT_ERRNO) .Text("Unable to connect to external process");
572  	//      break;
573  	  }
574  	}
575  	
576  	
577  	void dmtcp::TcpConnection::restoreOptions ( const dmtcp::vector<int>& fds )
578  	{
579  	
580  	  typedef dmtcp::map< int, dmtcp::map< int, jalib::JBuffer > >::iterator levelIterator;
581  	  typedef dmtcp::map< int, jalib::JBuffer >::iterator optionIterator;
582  	
583  	  if (_sockDomain != AF_INET6 && tcpType() != TCP_EXTERNAL_CONNECT ) {
584  	    for ( levelIterator lvl = _sockOptions.begin();
585  		  lvl!=_sockOptions.end(); ++lvl ) {
586  	      for ( optionIterator opt = lvl->second.begin();
587  		    opt!=lvl->second.end(); ++opt ) {
588  	        JTRACE ( "Restoring socket option." )
589  		       ( fds[0] ) ( opt->first ) ( opt->second.size() );
590  	        int ret = _real_setsockopt ( fds[0], lvl->first, opt->first,
591  					     opt->second.buffer(), opt->second.size() );
592  	        JASSERT ( ret == 0 ) ( JASSERT_ERRNO ) ( fds[0] )
593  		        (lvl->first) ( opt->first ) ( opt->second.size() )
594  	          .Text ( "Restoring setsockopt failed." );
595  	      }
596  	    }
597  	  }
598  	
599  	
600  	  //call base version (F_GETFL etc)
601  	  Connection::restoreOptions ( fds );
602  	
603  	}
604  	
605  	void dmtcp::TcpConnection::sendHandshake(jalib::JSocket& remote, const dmtcp::UniquePid& coordinator){
606  	  dmtcp::DmtcpMessage hello_local;
607  	  hello_local.type = dmtcp::DMT_HELLO_PEER;
608  	  hello_local.from = id();
609  	  hello_local.coordinator = coordinator;
610  	  remote << hello_local;
611  	}
612  	
613  	void dmtcp::TcpConnection::recvHandshake(jalib::JSocket& remote, const dmtcp::UniquePid& coordinator){
614  	  dmtcp::DmtcpMessage hello_remote;
615  	  hello_remote.poison();
616  	  remote >> hello_remote;
617  	  hello_remote.assertValid();
618  	  JASSERT ( hello_remote.type == dmtcp::DMT_HELLO_PEER );
619  	  JASSERT ( hello_remote.coordinator == coordinator )( hello_remote.coordinator ) ( coordinator )
620  	    .Text ( "Peer has a different dmtcp_coordinator than us!\n"
621  		    "  It must be the same." );
622  	
623  	  if(_acceptRemoteId.isNull()){
624  	    //first time
625  	    _acceptRemoteId = hello_remote.from;
626  	    JASSERT (!_acceptRemoteId.isNull())
627  		    .Text("Read handshake with invalid 'from' field.");
628  	  }else{
629  	    //next time
630  	    JASSERT (_acceptRemoteId == hello_remote.from)
631  		    (_acceptRemoteId)(hello_remote.from)
632  	            .Text("Read handshake with a different 'from' field"
633  			  " than a previous handshake.");
634  	  }
635  	}
636  	
637  	////////////
638  	///// PTY CHECKPOINTING
639  	
640  	void dmtcp::PtyConnection::preCheckpoint ( const dmtcp::vector<int>& fds
641  	    , KernelBufferDrainer& drain )
642  	{
643  	  if (ptyType() == PTY_MASTER && hasLock(fds)) {
644  	    const int maxCount = 10000;
645  	    char buf[maxCount];
646  	    int numRead, numWritten;
647  	    // fds[0] is master fd
648  	    numRead = ptmxReadAll(fds[0], buf, maxCount);
649  	    _ptmxIsPacketMode = ptmxTestPacketMode(fds[0]);
650  	    JTRACE("fds[0] is master (/dev/ptmx)")(fds[0])(_ptmxIsPacketMode);
651  	    numWritten = ptmxWriteAll(fds[0], buf, _ptmxIsPacketMode);
652  	    JASSERT(numRead == numWritten)(numRead)(numWritten);
653  	  }
654  	
655  	  if (ptyType() == PTY_SLAVE || ptyType() == PTY_BSD_SLAVE) {
656  	    _restoreInSecondIteration = true;
657  	  }
658  	}
659  	
660  	void dmtcp::PtyConnection::postCheckpoint ( const dmtcp::vector<int>& fds, bool isRestart )
661  	{
662  	  restoreOptions ( fds );
663  	}
664  	
665  	void dmtcp::PtyConnection::restore ( const dmtcp::vector<int>& fds, ConnectionRewirer& rewirer )
666  	{
667  	  JASSERT ( fds.size() > 0 );
668  	
669  	  int tempfd;
670  	
671  	  switch ( ptyType() )
672  	  {
673  	    case PTY_INVALID:
674  	      //tempfd = open("/dev/null", O_RDWR);
675  	      JTRACE ("Restoring invalid PTY.") (id());
676  	      return;
677  	
678  	    case PTY_DEV_TTY:
679  	    {
680  	      dmtcp::string tty = "/dev/tty";
681  	
682  	      tempfd = open ( tty.c_str(), _fcntlFlags );
683  	      JASSERT ( tempfd >= 0 ) ( tempfd ) ( tty ) ( JASSERT_ERRNO )
684  	        .Text ( "Error Opening the terminal device" );
685  	
686  	      JASSERT ( _real_dup2 ( tempfd, fds[0] ) == fds[0] ) ( tempfd ) ( fds[0] )
687  	        .Text ( "dup2() failed" );
688  	
689  	      close(tempfd);
690  	
691  	      JTRACE ( "Restoring /dev/tty for the process" ) ( tty ) ( fds[0] );
692  	
693  	      _ptsName = _uniquePtsName = tty;
694  	
695  	      break;
696  	    }
697  	
698  	    case PTY_CTTY:
699  	    {
700  	      dmtcp::string controllingTty = jalib::Filesystem::GetControllingTerm();
701  	      JASSERT ( controllingTty.length() > 0 ) ( STDIN_FILENO )
702  	        . Text ("Unable to restore terminal attached with the process");
703  	
704  	      tempfd = open ( controllingTty.c_str(), _fcntlFlags );
705  	      JASSERT ( tempfd >= 0 ) ( tempfd ) ( controllingTty ) ( JASSERT_ERRNO )
706  	        .Text ( "Error Opening the terminal attached with the process" );
707  	
708  	      JASSERT ( _real_dup2 ( tempfd, fds[0] ) == fds[0] ) ( tempfd ) ( fds[0] )
709  	        .Text ( "dup2() failed" );
710  	
711  	      close(tempfd);
712  	
713  	      JTRACE ( "Restoring CTTY for the process" ) ( controllingTty ) ( fds[0] );
714  	
715  	      _ptsName = _uniquePtsName = controllingTty;
716  	
717  	      break;
718  	    }
719  	
720  	    case PTY_MASTER:
721  	    {
722  	      char pts_name[80];
723  	      JTRACE ( "Restoring /dev/ptmx" ) ( fds[0] );
724  	
725  	      tempfd = open ( "/dev/ptmx", O_RDWR );
726  	
727  	      JASSERT ( tempfd >= 0 ) ( tempfd ) ( JASSERT_ERRNO )
728  	        .Text ( "Error Opening /dev/ptmx" );
729  	
730  	      JASSERT ( grantpt ( tempfd ) >= 0 ) ( tempfd ) ( JASSERT_ERRNO );
731  	
732  	      JASSERT ( unlockpt ( tempfd ) >= 0 ) ( tempfd ) ( JASSERT_ERRNO );
733  	
734  	      JASSERT ( _real_ptsname_r ( tempfd, pts_name, 80 ) == 0 ) ( tempfd ) ( JASSERT_ERRNO );
735  	
736  	      JASSERT ( _real_dup2 ( tempfd, fds[0] ) == fds[0] ) ( tempfd ) ( fds[0] )
737  	        .Text ( "dup2() failed" );
738  	
739  	      close(tempfd);
740  	
741  	      _ptsName = pts_name;
742  	
743  	      //dmtcp::string deviceName = "ptmx[" + _ptsName + "]:" + "/dev/ptmx";
744  	
745  	      //dmtcp::KernelDeviceToConnection::instance().erase ( id() );
746  	
747  	      //dmtcp::KernelDeviceToConnection::instance().createPtyDevice ( fds[0], deviceName, (Connection*) this );
748  	
749  	      UniquePtsNameToPtmxConId::instance().add ( _uniquePtsName, id() );
750  	
751  	      if (ptyType() == PTY_MASTER) {
752  	        int packetMode = _ptmxIsPacketMode;
753  	        ioctl(fds[0], TIOCPKT, &packetMode); /* Restore old packet mode */
754  	      }
755  	
756  	      break;
757  	    }
758  	    case PTY_SLAVE:
759  	    {
760  	      JASSERT( _ptsName.compare ( "?" ) != 0 );
761  	
762  	      _ptsName = dmtcp::UniquePtsNameToPtmxConId::instance().retrieveCurrentPtsDeviceName ( _uniquePtsName );
763  	
764  	      tempfd = open ( _ptsName.c_str(), O_RDWR );
765  	      JASSERT ( tempfd >= 0 ) ( _uniquePtsName ) ( _ptsName ) ( JASSERT_ERRNO )
766  	        .Text ( "Error Opening PTS" );
767  	
768  	      JASSERT ( _real_dup2 ( tempfd, fds[0] ) == fds[0] ) ( tempfd ) ( fds[0] )
769  	        .Text ( "dup2() failed" );
770  	
771  	      close(tempfd);
772  	
773  	      JTRACE ( "Restoring PTS real" ) ( _ptsName ) ( _uniquePtsName ) ( fds[0] );
774  	
775  	      //dmtcp::string deviceName = "pts:" + _ptsName;
776  	
777  	      //dmtcp::KernelDeviceToConnection::instance().erase ( id() );
778  	
779  	      //dmtcp::KernelDeviceToConnection::instance().createPtyDevice ( fds[0], deviceName, (Connection*) this );
780  	      break;
781  	    }
782  	    case PTY_BSD_MASTER:
783  	    {
784  	      JTRACE ( "Restoring BSD Master Pty" ) ( _bsdDeviceName ) ( fds[0] );
785  	      dmtcp::string slaveDeviceName = _bsdDeviceName.replace(0, strlen("/dev/pty"), "/dev/tty");
786  	
787  	      tempfd = open ( _bsdDeviceName.c_str(), O_RDWR );
788  	
789  	      // FIXME: If unable to open the original BSD Master Pty, we should try to
790  	      // open another one until we succeed and then open slave device accordingly.
791  	      // This can be done by creating a function openBSDMaster, which will try
792  	      // to open the original master device, but if unable to do so, it would
793  	      // keep on trying all the possible BSD Master devices until one is
794  	      // opened. It should then create a mapping between original Master/Slave
795  	      // device name and current Master/Slave device name.
796  	      JASSERT ( tempfd >= 0 ) ( tempfd ) ( JASSERT_ERRNO )
797  	        .Text ( "Error Opening BSD Master Pty. (Already in use?)" );
798  	
799  	      JASSERT ( _real_dup2 ( tempfd, fds[0] ) == fds[0] ) ( tempfd ) ( fds[0] )
800  	        .Text ( "dup2() failed" );
801  	
802  	      close(tempfd);
803  	
804  	      break;
805  	    }
806  	    case PTY_BSD_SLAVE:
807  	    {
808  	      JTRACE ( "Restoring BSD Slave Pty" ) ( _bsdDeviceName ) ( fds[0] );
809  	      dmtcp::string masterDeviceName = _bsdDeviceName.replace(0, strlen("/dev/tty"), "/dev/pty");
810  	
811  	      tempfd = open ( _bsdDeviceName.c_str(), O_RDWR );
812  	
813  	      JASSERT ( tempfd >= 0 ) ( tempfd ) ( JASSERT_ERRNO )
814  	        .Text ( "Error Opening BSD Slave Pty. (Already in use?)" );
815  	
816  	      JASSERT ( _real_dup2 ( tempfd, fds[0] ) == fds[0] ) ( tempfd ) ( fds[0] )
817  	        .Text ( "dup2() failed" );
818  	
819  	      close(tempfd);
820  	
821  	      break;
822  	    }
823  	    default:
824  	    {
825  	      // should never reach here
826  	      JASSERT ( false ) .Text( "Should never reach here." );
827  	    }
828  	  }
829  	
830  	  for ( size_t i=1; i<fds.size(); ++i )
831  	  {
832  	    JASSERT ( _real_dup2 ( fds[0], fds[i] ) == fds[i] ) ( fds[0] ) ( fds[i] )
833  	      .Text ( "dup2() failed" );
834  	  }
835  	}
836  	
837  	void dmtcp::PtyConnection::restoreOptions ( const dmtcp::vector<int>& fds )
838  	{
839  	  switch ( ptyType() )
840  	  {
841  	    case PTY_INVALID:
842  	      return;
843  	
844  	    case PTY_DEV_TTY:
845  	    {
846  	      dmtcp::string device = jalib::Filesystem::ResolveSymlink ( _procFDPath ( fds[0] ) );
847  	      JASSERT(device.compare("/dev/tty") == 0);
848  	      _ptsName = _uniquePtsName = device;
849  	      break;
850  	    }
851  	
852  	    case PTY_CTTY:
853  	    {
854  	      dmtcp::string device = jalib::Filesystem::ResolveSymlink ( _procFDPath ( fds[0] ) );
855  	      _ptsName = _uniquePtsName = device;
856  	      break;
857  	    }
858  	
859  	    case PTY_MASTER:
860  	    {
861  	      char pts_name[80];
862  	
863  	      JASSERT ( _real_ptsname_r ( fds[0], pts_name, 80 ) == 0 ) ( fds[0] ) ( JASSERT_ERRNO );
864  	
865  	      _ptsName = pts_name;
866  	
867  	      JTRACE ( "Restoring Options /dev/ptmx real" ) ( _ptsName ) ( _uniquePtsName ) ( fds[0] );
868  	
869  	      UniquePtsNameToPtmxConId::instance().add ( _uniquePtsName, id() );
870  	
871  	      break;
872  	    }
873  	    case PTY_SLAVE:
874  	    {
875  	      JASSERT( _ptsName.compare ( "?" ) != 0 );
876  	
877  	      _ptsName = jalib::Filesystem::ResolveSymlink ( _procFDPath ( fds[0] ) );
878  	
879  	      JTRACE ( "Restoring Options PTS real" ) ( _ptsName ) ( _uniquePtsName ) ( fds[0] );
880  	
881  	      break;
882  	    }
883  	    default:
884  	    {
885  	      // should never reach here
886  	      JASSERT ( false ) .Text( "Should never reach here." );
887  	    }
888  	  }
889  	  Connection::restoreOptions ( fds );
890  	}
891  	
892  	////////////
893  	///// FILE CHECKPOINTING
894  	
895  	// Upper limit on filesize for files that are automatically chosen for ckpt.
896  	// Default 100MB
897  	#define MAX_FILESIZE_TO_AUTOCKPT (100 * 1024 * 1024)
898  	
899  	void dmtcp::FileConnection::handleUnlinkedFile()
900  	{
901  	  if (!jalib::Filesystem::FileExists(_path)) {
902  	    /* File not present in Filesystem.
903  	     * /proc/self/fd lists filename of unlink()ed files as:
904  	     *   "<original_file_name> (deleted)"
905  	     */
906  	
907  	    if (Util::strEndsWith(_path, DELETED_FILE_SUFFIX)) {
908  	      _path.erase( _path.length() - strlen(DELETED_FILE_SUFFIX) );
909  	      _type = FILE_DELETED;
910  	    } else {
911  	      JASSERT(_type == FILE_DELETED) (_path)
912  	        .Text ("File not found on disk and yet the filename doesn't "
913  	               "contain the suffix '(deleted)'");
914  	    }
915  	  } else if (Util::strStartsWith(jalib::Filesystem::FileBaseName(_path),
916  	                                 ".nfs")) {
917  	    JWARNING(access(_path.c_str(), W_OK) == 0) (JASSERT_ERRNO);
918  	    JTRACE(".nfsXXXX: files that are unlink()'d but still in use by some process(es)")
919  	      (_path);
920  	    _type = FILE_DELETED;
921  	  }
922  	}
923  	
924  	void dmtcp::FileConnection::calculateRelativePath ()
925  	{
926  	  dmtcp::string cwd = jalib::Filesystem::GetCWD();
927  	  if (_path.compare(0, cwd.length(), cwd) == 0) {
928  	    /* CWD = "/A/B", FileName = "/A/B/C/D" ==> relPath = "C/D" */
929  	    _rel_path = _path.substr(cwd.length() + 1);
930  	  } else {
931  	    _rel_path = "*";
932  	  }
933  	}
934  	
935  	void dmtcp::FileConnection::preCheckpoint ( const dmtcp::vector<int>& fds
936  	    , KernelBufferDrainer& drain )
937  	{
938  	  JASSERT ( fds.size() > 0 );
939  	
940  	  handleUnlinkedFile();
941  	
942  	  calculateRelativePath();
943  	
944  	  _ckptFilesDir = UniquePid::checkpointFilesDirName();
945  	
946  	  // Read the current file descriptor offset
947  	  _offset = lseek(fds[0], 0, SEEK_CUR);
948  	  stat(_path.c_str(),&_stat);
949  	  _checkpointed = false;
950  	
951  	  if (_isBlacklistedFile(_path)) {
952  	    return;
953  	  }
954  	  if (hasLock(fds)) {
955  	    if (getenv(ENV_VAR_CKPT_OPEN_FILES) != NULL) {
956  	      saveFile(fds[0]);
957  	    } else if (_type == FILE_DELETED) {
958  	      saveFile(fds[0]);
959  	    } else if ((_fcntlFlags & (O_WRONLY|O_RDWR)) != 0 &&
960  	               _offset < _stat.st_size &&
961  	               _stat.st_size < MAX_FILESIZE_TO_AUTOCKPT &&
962  	               _stat.st_uid == getuid()) {
963  	      saveFile(fds[0]);
964  	    } else if (_isVimApp() &&
965  	               (Util::strEndsWith(_path, ".swp") == 0 ||
966  	                Util::strEndsWith(_path, ".swo") == 0)) {
967  	      saveFile(fds[0]);
968  	    } else if (Util::strStartsWith(jalib::Filesystem::GetProgramName(), "emacs")) {
969  	      saveFile(fds[0]);
970  	    }
971  	  } else {
972  	    _restoreInSecondIteration = true;
973  	  }
974  	}
975  	
976  	void dmtcp::FileConnection::postCheckpoint ( const dmtcp::vector<int>& fds, bool isRestart )
977  	{
978  	  restoreOptions ( fds );
979  	  if (_checkpointed && isRestart && _type == FILE_DELETED) {
980  	    /* Here we want to unlink the file. We want to do it only at the time of
981  	     * restart, but there is no way of finding out if we are restarting or not.
982  	     * That is why we look for the file on disk and if it is present (it was
983  	     * deleted at ckpt time), then we assume that we are restarting and hence
984  	     * we unlink the file.
985  	     */
986  	     if (jalib::Filesystem::FileExists(_path)) {
987  	      JWARNING( unlink(_path.c_str()) != -1 ) (_path)
988  	        .Text("The file was unlinked at the time of checkpoint. "
989  	              "Unlinking it after restart failed");
990  	     }
991  	  }
992  	}
993  	
994  	void dmtcp::FileConnection::refreshPath()
995  	{
996  	  dmtcp::string cwd = jalib::Filesystem::GetCWD();
997  	  if( _rel_path != "*" ){ // file path is relative to executable current dir
998  	    string oldPath = _path;
999  	    dmtcp::string fullPath = cwd + "/" + _rel_path;
1000 	    if( jalib::Filesystem::FileExists(fullPath) ){
1001 	      _path = fullPath;
1002 	      JTRACE("Change _path based on relative path") (oldPath) (_path) (_rel_path);
1003 	    }
1004 	  }
1005 	}
1006 	
1007 	void dmtcp::FileConnection::restoreOptions ( const dmtcp::vector<int>& fds )
1008 	{
1009 	  refreshPath();
1010 	  //call base version (F_GETFL etc)
1011 	  Connection::restoreOptions ( fds );
1012 	}
1013 	
1014 	
1015 	void dmtcp::FileConnection::restore ( const dmtcp::vector<int>& fds, 
1016 	                                      ConnectionRewirer& rewirer )
1017 	{
1018 	  struct stat buf;
1019 	
1020 	  JASSERT ( fds.size() > 0 );
1021 	
1022 	  JTRACE("Restoring File Connection") (id()) (_path);
1023 	  refreshPath();
1024 	
1025 	  if (_checkpointed && jalib::Filesystem::FileExists(_path)) {
1026 	    JASSERT(false) (_path)
1027 	      .Text("File aready exists! Checkpointed copy can't be restored. "
1028 	            "Delete the existing file and try again!");
1029 	  }
1030 	
1031 	  if (stat(_path.c_str() ,&buf) == 0 && S_ISREG(buf.st_mode)) {
1032 	    if (buf.st_size > _stat.st_size && 
1033 	        _fcntlFlags & (O_WRONLY|O_RDWR) != 0) {
1034 	      errno = 0;
1035 	      JASSERT ( truncate ( _path.c_str(), _stat.st_size ) ==  0 )
1036 	              ( _path.c_str() ) ( _stat.st_size ) ( JASSERT_ERRNO );
1037 	    } else if (buf.st_size < _stat.st_size) {
1038 	      JWARNING (false) .Text("Size of file smaller than what we expected");
1039 	    }
1040 	  }
1041 	
1042 	  int tempfd = openFile ();
1043 	
1044 	  JASSERT ( tempfd > 0 ) ( tempfd ) ( _path ) ( JASSERT_ERRNO );
1045 	
1046 	  for(size_t i=0; i<fds.size(); ++i)
1047 	  {
1048 	    JASSERT ( _real_dup2 ( tempfd, fds[0] ) == fds[0] ) ( tempfd ) ( fds[0] )
1049 	      .Text ( "dup2() failed" );
1050 	  }
1051 	  _real_close(tempfd);
1052 	
1053 	  errno = 0;
1054 	  if (S_ISREG(buf.st_mode)) {
1055 	    if (_offset <= buf.st_size && _offset <= _stat.st_size) {
1056 	      JASSERT ( lseek ( fds[0], _offset, SEEK_SET ) == _offset )
1057 		      ( _path ) ( _offset ) ( JASSERT_ERRNO );
1058 	      //JTRACE ("lseek ( fds[0], _offset, SEEK_SET )") (fds[0]) (_offset);
1059 	    } else if (_offset > buf.st_size || _offset > _stat.st_size) {
1060 	      JWARNING(false) ( _path ) (_offset ) ( _stat.st_size ) ( buf.st_size )
1061 	        .Text("No lseek done:  offset is larger than min of old and new size.");
1062 	    }
1063 	  }
1064 	}
1065 	
1066 	static void CreateDirectoryStructure(const dmtcp::string& path)
1067 	{
1068 	  size_t index = path.rfind('/');
1069 	
1070 	  if (index == dmtcp::string::npos)
1071 	    return;
1072 	
1073 	  dmtcp::string dir = path.substr(0, index);
1074 	
1075 	  index = path.find('/');
1076 	  while (index != dmtcp::string::npos) {
1077 	    if (index > 1) {
1078 	      dmtcp::string dirName = path.substr(0, index);
1079 	
1080 	      int res = mkdir(dirName.c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
1081 	      JASSERT(res != -1 || errno==EEXIST) (dirName) (path)
1082 	        .Text("Unable to create directory in File Path");
1083 	    }
1084 	    index = path.find('/', index+1);
1085 	  }
1086 	}
1087 	
1088 	static void CopyFile(const dmtcp::string& src, const dmtcp::string& dest)
1089 	{
1090 	  //dmtcp::ifstream in(src.c_str(), dmtcp::ios::in | dmtcp::ios::binary);
1091 	  //dmtcp::ofstream out(dest.c_str(), dmtcp::ios::in | dmtcp::ios::binary);
1092 	  //out << in.rdbuf();
1093 	
1094 	  dmtcp::string command = "cp -f " + src + " " + dest;
1095 	  JASSERT(_real_system(command.c_str()) != -1);
1096 	}
1097 	
1098 	int dmtcp::FileConnection::openFile()
1099 	{
1100 	  int fd;
1101 	  JASSERT(WorkerState::currentState() == WorkerState::RESTARTING);
1102 	
1103 	  /* 
1104 	   * This file was not checkpointed by this process so it won't be restored by
1105 	   * this process. Thus, we wait while some other process restores this file
1106 	   */
1107 	  int count = 1;
1108 	  while (!_checkpointed && !jalib::Filesystem::FileExists(_path)) {
1109 	    struct timespec sleepTime = {0, 10*1000*1000};
1110 	    nanosleep(&sleepTime, NULL);
1111 	    count++;
1112 	    if (count % 200 == 0) {
1113 	      // Print this message every second
1114 	      JTRACE("Waiting for the file to be created/restored by some other process") (_path);
1115 	    }
1116 	    if (count%1000 == 0) {
1117 	      JWARNING(false) (_path)
1118 	        .Text ("Still waiting for the file to be created/restored by some other process");
1119 	    }
1120 	  }
1121 	
1122 	  if (_checkpointed && !jalib::Filesystem::FileExists(_path)) {
1123 	
1124 	    JNOTE("File not present, copying from saved checkpointed file") (_path);
1125 	
1126 	    dmtcp::string savedFilePath = getSavedFilePath(_path);
1127 	
1128 	    JASSERT( jalib::Filesystem::FileExists(savedFilePath) )
1129 	      (savedFilePath) (_path) .Text("Unable to Find checkpointed copy of File");
1130 	
1131 	    CreateDirectoryStructure(_path);
1132 	
1133 	    JTRACE("Copying saved checkpointed file to original location")
1134 	      (savedFilePath) (_path);
1135 	    CopyFile(savedFilePath, _path);
1136 	    //HACK: This was deleting our checkpoint files on RHEL5.2,
1137 	    //      perhaps we are leaking file descriptors in the restart process.
1138 	    //      Deleting files is scary... maybe we want to make a stricter test.
1139 	    //
1140 	    // // Unlink the File if the File was unlinked at the time of checkpoint
1141 	    // if (_type == FILE_DELETED) {
1142 	    //   JASSERT( unlink(_path.c_str()) != -1 )
1143 	    //     .Text("Unlinking of pre-checkpoint-deleted file failed");
1144 	    // }
1145 	
1146 	  }
1147 	
1148 	  fd = open(_path.c_str(), _fcntlFlags);
1149 	  JTRACE ("open(_path.c_str(), _fcntlFlags)")
1150 		 (fd) (_path.c_str() )(_fcntlFlags);
1151 	
1152 	  JASSERT(fd != -1) (_path) (JASSERT_ERRNO)
1153 	    .Text ("open() failed");
1154 	  return fd;
1155 	}
1156 	
1157 	void dmtcp::FileConnection::saveFile(int fd)
1158 	{
1159 	  _checkpointed = true;
1160 	  dmtcp::string savedFilePath = getSavedFilePath(_path);
1161 	  CreateDirectoryStructure(savedFilePath);
1162 	  JTRACE("Saving checkpointed copy of the file") (_path) (savedFilePath);
1163 	
1164 	  if (_type == FILE_REGULAR ||
1165 	    jalib::Filesystem::FileExists(_path)) {
1166 	    CopyFile(_path, savedFilePath);
1167 	    return;
1168 	  } else if (_type == FileConnection::FILE_DELETED) {
1169 	    const size_t bufSize = 2 * PAGE_SIZE;
1170 	    char *buf = (char*)JALLOC_HELPER_MALLOC(bufSize);
1171 	
1172 	    int destFd = open(savedFilePath.c_str(), O_CREAT | O_WRONLY | O_TRUNC,
1173 	                                             S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
Event var_tested_neg: Variable "destFd" tests negative.
Also see events: [negative_returns]
At conditional (1): "destFd != -1": Taking false branch.
1174 	    JASSERT(destFd != -1) (_path) (savedFilePath) .Text("Read Failed");
1175 	
1176 	    lseek(fd, 0, SEEK_SET);
1177 	
1178 	    int readBytes, writtenBytes;
At conditional (2): "true": Taking true branch.
1179 	    while(1) {
1180 	      readBytes = Util::readAll(fd, buf, bufSize);
At conditional (3): "readBytes != -1": Taking true branch.
1181 	      JASSERT(readBytes != -1) 
1182 	        (_path) (JASSERT_ERRNO) .Text("Read Failed");
At conditional (4): "readBytes == 0": Taking true branch.
1183 	      if (readBytes == 0) break;
1184 	      writtenBytes = Util::writeAll(destFd, buf, readBytes);
1185 	      JASSERT(writtenBytes != -1) 
1186 	        (savedFilePath) (JASSERT_ERRNO) .Text("Write failed.");
1187 	    }
1188 	
Event negative_returns: "destFd" is passed to a parameter that cannot be negative.
Also see events: [var_tested_neg]
1189 	    close(destFd);
1190 	    JALLOC_HELPER_FREE(buf);
1191 	  }
1192 	
1193 	  JASSERT( lseek(fd, _offset, SEEK_SET) != -1 ) (_path);
1194 	}
1195 	
1196 	dmtcp::string dmtcp::FileConnection::getSavedFilePath(const dmtcp::string& path)
1197 	{
1198 	  const char *cwd_env = getenv( ENV_VAR_CHECKPOINT_DIR );
1199 	  dmtcp::string cwd;
1200 	  if ( cwd_env == NULL ) {
1201 	    cwd = jalib::Filesystem::GetCWD();
1202 	  } else {
1203 	    cwd = cwd_env;
1204 	  }
1205 	
1206 	  JASSERT (!_ckptFilesDir.empty());
1207 	
1208 	  dmtcp::ostringstream os;
1209 	  os << cwd 
1210 	     << "/" << _ckptFilesDir
1211 	     << "/" << jalib::Filesystem::FileBaseName(_path) << "_" << _id.conId();
1212 	
1213 	  return os.str();
1214 	}
1215 	
1216 	/* We want to check if the two file descriptor corresponding to the two
1217 	 * FileConnections are different or identical. This function verifies that the
1218 	 * filenames and offset on both fds are same. If they are same, and if
1219 	 * lseek()ing one fd changes the offset for other fd as well, then the two fds
1220 	 * are identical i.e. they were created by dup() and not by open().
1221 	 */
1222 	bool dmtcp::FileConnection::isDupConnection ( const Connection& _that, dmtcp::ConnectionToFds& conToFds)
1223 	{
1224 	  bool retVal = false;
1225 	
1226 	  JASSERT ( _that.conType() == Connection::FILE );
1227 	
1228 	  const FileConnection& that = (const FileConnection&)_that;
1229 	
1230 	  const dmtcp::vector<int>& thisFds = conToFds[_id];
1231 	  const dmtcp::vector<int>& thatFds = conToFds[that._id];
1232 	
1233 	  if ( _path == that._path &&
1234 	       ( lseek(thisFds[0], 0, SEEK_CUR) == lseek(thatFds[0], 0, SEEK_CUR) ) ) {
1235 	    off_t newOffset = lseek (thisFds[0], 1, SEEK_CUR);
1236 	    JASSERT (newOffset != -1) (JASSERT_ERRNO) .Text("lseek failed");
1237 	
1238 	    if ( newOffset == lseek (thatFds[0], 0, SEEK_CUR) ) {
1239 	      retVal = true;
1240 	    }
1241 	    // Now restore the old offset
1242 	    JASSERT (-1 != lseek (thisFds[0], -1, SEEK_CUR)) .Text("lseek failed");
1243 	  }
1244 	  return retVal;
1245 	}
1246 	
1247 	////////////
1248 	///// FIFO CHECKPOINTING
1249 	
1250 	//void dmtcp::FifoConnection::doLocking ( const dmtcp::vector<int>& fds )
1251 	//{
1252 	//  int i=0,trials = 4;
1253 	//
1254 	//  JTRACE ("doLocking for FIFO");
1255 	//  while( i < trials ){
1256 	//    JTRACE ("Loop iteration") (i);
1257 	//    errno = 0;
1258 	//    int ret = flock(fds[0],LOCK_EX | LOCK_NB);
1259 	//    JTRACE ("flock ret") (ret);
1260 	//    if( !ret ){
1261 	//      JTRACE ("fd has lock") (ret);
1262 	//      _has_lock = true;
1263 	//      return;
1264 	//    }else if( errno == EWOULDBLOCK ){
1265 	//      JTRACE ("fd has no lock") (ret);
1266 	//      _has_lock = false;
1267 	//      return;
1268 	//    }
1269 	//  }
1270 	//  _has_lock=false;
1271 	//  JTRACE ("Error while locking FIFO") (errno);
1272 	//}
1273 	//
1274 	
1275 	void dmtcp::FifoConnection::preCheckpoint ( const dmtcp::vector<int>& fds
1276 	    , KernelBufferDrainer& drain )
1277 	{
1278 	  JASSERT ( fds.size() > 0 );
1279 	
1280 	  if (!hasLock(fds)) {
1281 	    return;
1282 	  }
1283 	
1284 	  _has_lock = true;
1285 	
1286 	  stat(_path.c_str(),&_stat);
1287 	
1288 	
1289 	  JTRACE ("Checkpoint fifo.") (fds[0]);
1290 	
1291 	  int new_flags = (_fcntlFlags & (~(O_RDONLY|O_WRONLY))) | O_RDWR | O_NONBLOCK;
1292 	  ckptfd = open(_path.c_str(),new_flags);
1293 	  JASSERT(ckptfd >= 0)(ckptfd)(JASSERT_ERRNO);
1294 	
1295 	  _in_data.clear();
1296 	  int bufsize = 256;
1297 	  char buf[bufsize];
1298 	  int size;
1299 	
1300 	
1301 	  while(1){ // flush fifo
1302 	    size = read(ckptfd,buf,bufsize);
1303 	    if( size < 0 ){
1304 	      break; // nothing to flush
1305 	    }
1306 		for(int i=0;i<size;i++){
1307 			_in_data.push_back(buf[i]);
1308 		}
1309 	  }
1310 	  close(ckptfd);
1311 	  JTRACE ("Checkpointing fifo:  end.") (fds[0]) (_in_data.size());
1312 	
1313 	}
1314 	
1315 	void dmtcp::FifoConnection::postCheckpoint ( const dmtcp::vector<int>& fds, bool isRestart )
1316 	{
1317 	  if( !_has_lock )
1318 	    return; // nothing to do now
1319 	
1320 	  int new_flags = (_fcntlFlags & (~(O_RDONLY|O_WRONLY))) | O_RDWR | O_NONBLOCK;
1321 	  ckptfd = open(_path.c_str(),new_flags);
1322 	  JASSERT (ckptfd >= 0) (ckptfd) (JASSERT_ERRNO);
1323 	
1324 	  int bufsize = 256;
1325 	  char buf[bufsize];
1326 	  int j;
1327 	  ssize_t ret;
1328 	  for(size_t i=0;i<(_in_data.size()/bufsize);i++){ // refill fifo
1329 	    for(j=0; j<bufsize; j++){
1330 	      buf[j] = _in_data[j+i*bufsize];
1331 	    }
1332 	    ret=write(ckptfd,buf,j);
1333 	    JASSERT (ret == j) (JASSERT_ERRNO) (ret)(j) (fds[0])(i);
1334 	  }
1335 	  int start = (_in_data.size()/bufsize)*bufsize;
1336 	  for(j=0; j<_in_data.size()%bufsize; j++){
1337 	    buf[j] = _in_data[start+j];
1338 	  }
1339 	  errno=0;
1340 	  buf[j] ='\0';
1341 	  JTRACE ("Buf internals.") ((const char*)buf);
1342 	  ret = Util::writeAll(ckptfd,buf,j);
1343 	  JASSERT (ret == j) (JASSERT_ERRNO)(ret)(j) (fds[0]);
1344 	
1345 	  close(ckptfd);
1346 	  // unlock fifo
1347 	  flock(fds[0],LOCK_UN);
1348 	  JTRACE ("End checkpointing fifo.") (fds[0]);
1349 	  restoreOptions ( fds );
1350 	}
1351 	
1352 	void dmtcp::FifoConnection::refreshPath()
1353 	{
1354 	  dmtcp::string cwd = jalib::Filesystem::GetCWD();
1355 	  if( _rel_path != "*" ){ // file path is relative to executable current dir
1356 	    string oldPath = _path;
1357 	    ostringstream fullPath;
1358 	    fullPath << cwd << "/" << _rel_path;
1359 	    if( jalib::Filesystem::FileExists(fullPath.str()) ){
1360 	      _path = fullPath.str();
1361 		  JTRACE("Change _path based on relative path")(oldPath)(_path);
1362 	    }
1363 	  }
1364 	}
1365 	
1366 	void dmtcp::FifoConnection::restoreOptions ( const dmtcp::vector<int>& fds )
1367 	{
1368 	  refreshPath();
1369 	  //call base version (F_GETFL etc)
1370 	  Connection::restoreOptions ( fds );
1371 	}
1372 	
1373 	
1374 	void dmtcp::FifoConnection::restore ( const dmtcp::vector<int>& fds, ConnectionRewirer& rewirer )
1375 	{
1376 	  JASSERT ( fds.size() > 0 );
1377 	
1378 	  JTRACE("Restoring Fifo Connection") (id()) (_path);
1379 	  errno = 0;
1380 	  refreshPath();
1381 	  int tempfd = openFile ();
1382 	  JASSERT ( tempfd > 0 ) ( tempfd ) ( _path ) ( JASSERT_ERRNO );
1383 	
1384 	  int new_flags = (_fcntlFlags & (~(O_RDONLY|O_WRONLY))) | O_RDWR | O_NONBLOCK;
1385 	
1386 	  for(size_t i=0; i<fds.size(); ++i)
1387 	  {
1388 	    JASSERT ( _real_dup2 ( tempfd, fds[i] ) == fds[i] ) ( tempfd ) ( fds[i] )
1389 	      .Text ( "dup2() failed." );
1390 	  }
1391 	}
1392 	
1393 	int dmtcp::FifoConnection::openFile()
1394 	{
1395 	  int fd;
1396 	
1397 	  if (!jalib::Filesystem::FileExists(_path)) {
1398 	    JTRACE("Fifo file not present, creating new one") (_path);
1399 	    mkfifo(_path.c_str(),_stat.st_mode);
1400 	  }
1401 	
1402 	  fd = open(_path.c_str(), O_RDWR | O_NONBLOCK);
1403 	  JTRACE("Is opened")(_path.c_str())(fd);
1404 	
1405 	  JASSERT(fd != -1) (_path) (JASSERT_ERRNO);
1406 	  return fd;
1407 	}
1408 	
1409 	////////////
1410 	//// SERIALIZATION
1411 	
1412 	void dmtcp::Connection::serialize ( jalib::JBinarySerializer& o )
1413 	{
1414 	  JSERIALIZE_ASSERT_POINT ( "dmtcp::Connection" );
1415 	  o & _id & _type & _fcntlFlags & _fcntlOwner & _fcntlSignal & _restoreInSecondIteration;
1416 	  serializeSubClass ( o );
1417 	}
1418 	
1419 	void dmtcp::TcpConnection::serializeSubClass ( jalib::JBinarySerializer& o )
1420 	{
1421 	  JSERIALIZE_ASSERT_POINT ( "dmtcp::TcpConnection" );
1422 	  o & _sockDomain  & _sockType & _sockProtocol & _listenBacklog
1423 	    & _bindAddrlen & _bindAddr & _acceptRemoteId;
1424 	
1425 	  JSERIALIZE_ASSERT_POINT ( "SocketOptions:" );
1426 	  size_t numSockOpts = _sockOptions.size();
1427 	  o & numSockOpts;
1428 	  if ( o.isWriter() )
1429 	  {
1430 	    JTRACE ( "TCP Serialize " ) ( _type ) ( _id.conId() );
1431 	    typedef dmtcp::map< int, dmtcp::map< int, jalib::JBuffer > >::iterator levelIterator;
1432 	    typedef dmtcp::map< int, jalib::JBuffer >::iterator optionIterator;
1433 	
1434 	    size_t numLvl = _sockOptions.size();
1435 	    o & numLvl;
1436 	
1437 	    for ( levelIterator lvl = _sockOptions.begin(); lvl!=_sockOptions.end(); ++lvl )
1438 	    {
1439 	      int lvlVal = lvl->first;
1440 	      size_t numOpts = lvl->second.size();
1441 	
1442 	      JSERIALIZE_ASSERT_POINT ( "Lvl" );
1443 	
1444 	      o & lvlVal & numOpts;
1445 	
1446 	      for ( optionIterator opt = lvl->second.begin(); opt!=lvl->second.end(); ++opt )
1447 	      {
1448 	        int optType = opt->first;
1449 	        jalib::JBuffer& buffer = opt->second;
1450 	        int bufLen = buffer.size();
1451 	
1452 	        JSERIALIZE_ASSERT_POINT ( "Opt" );
1453 	
1454 	        o & optType & bufLen;
1455 	        o.readOrWrite ( buffer.buffer(), bufLen );
1456 	      }
1457 	    }
1458 	  }
1459 	  else
1460 	  {
1461 	    size_t numLvl = 0;
1462 	    o & numLvl;
1463 	
1464 	    while ( numLvl-- > 0 )
1465 	    {
1466 	      int lvlVal = -1;
1467 	      size_t numOpts = 0;
1468 	
1469 	      JSERIALIZE_ASSERT_POINT ( "Lvl" );
1470 	
1471 	      o & lvlVal & numOpts;
1472 	
1473 	      while ( numOpts-- > 0 )
1474 	      {
1475 	        int optType = -1;
1476 	        int bufLen = -1;
1477 	
1478 	        JSERIALIZE_ASSERT_POINT ( "Opt" );
1479 	
1480 	        o & optType & bufLen;
1481 	
1482 	        jalib::JBuffer buffer ( bufLen );
1483 	        o.readOrWrite ( buffer.buffer(), bufLen );
1484 	
1485 	        _sockOptions[lvlVal][optType]=buffer;
1486 	      }
1487 	    }
1488 	  }
1489 	
1490 	  JSERIALIZE_ASSERT_POINT ( "EndSockOpts" );
1491 	
1492 	  dmtcp::map< int, dmtcp::map< int, jalib::JBuffer > > _sockOptions;
1493 	}
1494 	
1495 	void dmtcp::FileConnection::serializeSubClass ( jalib::JBinarySerializer& o )
1496 	{
1497 	  JSERIALIZE_ASSERT_POINT ( "dmtcp::FileConnection" );
1498 	  o & _path & _rel_path & _ckptFilesDir;
1499 	  o & _offset & _stat & _checkpointed;
1500 	  JTRACE("Serializing") (_path) (_rel_path) (_ckptFilesDir)
1501 	    (_checkpointed) (_fcntlFlags);
1502 	}
1503 	
1504 	void dmtcp::FifoConnection::serializeSubClass ( jalib::JBinarySerializer& o )
1505 	{
1506 	  JSERIALIZE_ASSERT_POINT ( "dmtcp::FifoConnection" );
1507 	  o & _path & _rel_path & _savedRelativePath & _stat & _in_data & _has_lock;
1508 	}
1509 	
1510 	void dmtcp::PtyConnection::serializeSubClass ( jalib::JBinarySerializer& o )
1511 	{
1512 	  JSERIALIZE_ASSERT_POINT ( "dmtcp::PtyConnection" );
1513 	  o & _ptsName & _uniquePtsName & _bsdDeviceName & _type & _ptmxIsPacketMode;
1514 	
1515 	  if ( o.isReader() )
1516 	  {
1517 	    if ( _type == dmtcp::PtyConnection::PTY_MASTER ) {
1518 	      dmtcp::UniquePtsNameToPtmxConId::instance().add ( _uniquePtsName, _id );
1519 	    }
1520 	  }
1521 	}
1522 	
1523 	// void dmtcp::PipeConnection::serializeSubClass(jalib::JBinarySerializer& o)
1524 	// {
1525 	//     JSERIALIZE_ASSERT_POINT("dmtcp::PipeConnection");
1526 	//
1527 	//     JASSERT(false).Text("Pipes should have been replaced by socketpair() automagically.");
1528 	// }
1529 	
1530 	static bool ptmxTestPacketMode(int masterFd) {
1531 	  char tmp_buf[100];
1532 	  int slave_fd, ioctlArg, rc;
1533 	  fd_set readfds;
1534 	  struct timeval zeroTimeout = {0, 0}; /* Zero: will use to poll, not wait.*/
1535 	
1536 	  _real_ptsname_r(masterFd, tmp_buf, 100);
1537 	  /* permissions not used, but _real_open requires third arg */
1538 	  slave_fd = _real_open(tmp_buf, O_RDWR, 0666);
1539 	
1540 	  /* A. Drain master before testing.
1541 	        Ideally, DMTCP has already drained it and preserved any information
1542 	        about exceptional conditions in command byte, but maybe we accidentally
1543 	        caused a new command byte in packet mode. */
1544 	  /* Note:  if terminal was in packet mode, and the next read would be
1545 	     a non-data command byte, then there's no easy way for now to detect and
1546 	     restore this. ?? */
1547 	  /* Note:  if there was no data to flush, there might be no command byte,
1548 	     even in packet mode. */
1549 	  tcflush(slave_fd, TCIOFLUSH);
1550 	  /* If character already transmitted (usual case for pty), then this flush
1551 	     will tell master to flush it. */
1552 	  tcflush(masterFd, TCIFLUSH);
1553 	
1554 	  /* B. Now verify that readfds has no more characters to read. */
1555 	  ioctlArg = 1;
1556 	  ioctl(masterFd, TIOCINQ, &ioctlArg);
1557 	  /* Now check if there's a command byte still to read. */
1558 	  FD_ZERO(&readfds);
1559 	  FD_SET(masterFd, &readfds);
1560 	  select(masterFd + 1, &readfds, NULL, NULL, &zeroTimeout);
1561 	  if (FD_ISSET(masterFd, &readfds)) {
1562 	    // Clean up someone else's command byte from packet mode.
1563 	    // FIXME:  We should restore this on resume/restart.
1564 	    rc = read(masterFd, tmp_buf, 100);
1565 	    JASSERT ( rc == 1 ) (rc) (masterFd);
1566 	  }
1567 	
1568 	  /* C. Now we're ready to do the real test.  If in packet mode, we should
1569 	        see command byte of TIOCPKT_DATA (0) with data. */
1570 	  tmp_buf[0] = 'x'; /* Don't set '\n'.  Could be converted to "\r\n". */
1571 	  /* Give the masterFd something to read. */
1572 	  JWARNING ((rc = write(slave_fd, tmp_buf, 1)) == 1) (rc) .Text("write failed");
1573 	  tcdrain(slave_fd);
1574 	  _real_close(slave_fd);
1575 	
1576 	  /* Read the 'x':  If we also see a command byte, it's packet mode */
1577 	  rc = read(masterFd, tmp_buf, 100);
1578 	
1579 	  /* D. Check if command byte packet exists, and chars rec'd is longer by 1. */
1580 	  return (rc == 2 && tmp_buf[0] == TIOCPKT_DATA && tmp_buf[1] == 'x');
1581 	}
1582 	// Also record the count read on each iteration, in case it's packet mode
1583 	static bool readyToRead(int fd) {
1584 	  fd_set readfds;
1585 	  struct timeval zeroTimeout = {0, 0}; /* Zero: will use to poll, not wait.*/
1586 	  FD_ZERO(&readfds);
1587 	  FD_SET(fd, &readfds);
1588 	  select(fd + 1, &readfds, NULL, NULL, &zeroTimeout);
1589 	  return FD_ISSET(fd, &readfds);
1590 	}
1591 	// returns 0 if not ready to read; else returns -1, or size read incl. header
1592 	static ssize_t readOnePacket(int fd, const void *buf, size_t maxCount) {
1593 	  typedef int hdr;
1594 	  int rc = 0;
1595 	  // Read single packet:  rc > 0 will be true for at most one iteration.
1596 	  while (readyToRead(fd) && rc <= 0) {
1597 	    rc = read(fd, (char *)buf+sizeof(hdr), maxCount-sizeof(hdr));
1598 	    *(hdr *)buf = rc; // Record the number read in header
1599 	    if (rc >= maxCount-sizeof(hdr)) {
1600 	      rc = -1; errno = E2BIG; // Invoke new errno for buf size not large enough
1601 	    }
1602 	    if (rc == -1 && errno != EAGAIN && errno != EINTR)
1603 	      break;  /* Give up; bad error */
1604 	  }
1605 	  return (rc <= 0 ? rc : rc+sizeof(hdr));
1606 	}
1607 	// rc < 0 => error; rc == sizeof(hdr) => no data to read;
1608 	// rc > 0 => saved w/ count hdr
1609 	static ssize_t ptmxReadAll(int fd, const void *origBuf, size_t maxCount) {
1610 	  typedef int hdr;
1611 	  char *buf = (char *)origBuf;
1612 	  int rc;
1613 	  while ((rc = readOnePacket(fd, buf, maxCount)) > 0) {
1614 	    buf += rc;
1615 	  }
1616 	  *(hdr *)buf = 0; /* Header count of zero means we're done */
1617 	  buf += sizeof(hdr);
1618 	  JASSERT(rc < 0 || buf - (char *)origBuf > 0)(rc)(origBuf)((void *)buf);
1619 	  return (rc < 0 ? rc : buf - (char *)origBuf);
1620 	}
1621 	// The hdr contains the size of the full buffer ( [hdr, data] ).
1622 	// Return size of origBuf written:  includes packets of form:  [hdr, data]
1623 	//   with hdr holding size of data.  Last hdr has value zero.
1624 	// Also record the count written on each iteration, in case it's packet mode.
1625 	static ssize_t writeOnePacket(int fd, const void *origBuf, bool isPacketMode) {
1626 	  typedef int hdr;
1627 	  int count = *(hdr *)origBuf;
1628 	  int cum_count = 0;
1629 	  int rc;
1630 	  if (count == 0)
1631 	    return sizeof(hdr);  // count of zero means we're done, hdr consumed
1632 	  // FIXME:  It would be nice to restore packet mode (flow control, etc.)
1633 	  //         For now, we ignore it.
1634 	  if (count == 1 && isPacketMode)
1635 	    return sizeof(hdr) + 1;
1636 	  while (cum_count < count) {
1637 	    rc = write(fd, (char *)origBuf+sizeof(hdr)+cum_count, count-cum_count);
1638 	    if (rc == -1 && errno != EAGAIN && errno != EINTR)
1639 	      break;  /* Give up; bad error */
1640 	    if (rc >= 0)
1641 	      cum_count += rc;
1642 	  }
1643 	  JASSERT(rc != 0 && cum_count == count)(JASSERT_ERRNO)(rc)(count)(cum_count);
1644 	  return (rc < 0 ? rc : cum_count+sizeof(hdr));
1645 	}
1646 	static ssize_t ptmxWriteAll(int fd, const void *buf, bool isPacketMode) {
1647 	  typedef int hdr;
1648 	  ssize_t cum_count = 0;
1649 	  int rc;
1650 	  while ((rc = writeOnePacket(fd, (char *)buf+cum_count, isPacketMode))
1651 		 > sizeof(hdr)) {
1652 	    cum_count += rc;
1653 	  }
1654 	  JASSERT (rc < 0 || rc == sizeof(hdr)) (rc) (cum_count);
1655 	  cum_count += sizeof(hdr);  /* Account for last packet: 'done' hdr w/ 0 data */
1656 	  return (rc <= 0 ? rc : cum_count);
1657 	}
1658 	
1659 	
1660 	#define MERGE_MISMATCH_TEXT .Text("Mismatch when merging connections from different restore targets")
1661 	
1662 	void dmtcp::Connection::mergeWith ( const Connection& that ){
1663 	  JASSERT (_id          == that._id)         MERGE_MISMATCH_TEXT;
1664 	  JASSERT (_type        == that._type)       MERGE_MISMATCH_TEXT;
1665 	  JWARNING(_fcntlFlags  == that._fcntlFlags) MERGE_MISMATCH_TEXT;
1666 	  JWARNING(_fcntlOwner  == that._fcntlOwner) MERGE_MISMATCH_TEXT;
1667 	  JWARNING(_fcntlSignal == that._fcntlSignal)MERGE_MISMATCH_TEXT;
1668 	}
1669 	
1670 	void dmtcp::TcpConnection::mergeWith ( const Connection& _that ){
1671 	  Connection::mergeWith(_that);
1672 	  const TcpConnection& that = (const TcpConnection&)_that; //Connection::_type match is checked in Connection::mergeWith
1673 	  JWARNING(_sockDomain    == that._sockDomain)   MERGE_MISMATCH_TEXT;
1674 	  JWARNING(_sockType      == that._sockType)     MERGE_MISMATCH_TEXT;
1675 	  JWARNING(_sockProtocol  == that._sockProtocol) MERGE_MISMATCH_TEXT;
1676 	  JWARNING(_listenBacklog == that._listenBacklog)MERGE_MISMATCH_TEXT;
1677 	  JWARNING(_bindAddrlen   == that._bindAddrlen)  MERGE_MISMATCH_TEXT;
1678 	  //todo: check _bindAddr and _sockOptions
1679 	
1680 	  JTRACE("Merging TcpConnections")(_acceptRemoteId)(that._acceptRemoteId);
1681 	
1682 	  //merge _acceptRemoteId smartly
1683 	  if(_acceptRemoteId.isNull())
1684 	    _acceptRemoteId = that._acceptRemoteId;
1685 	
1686 	  if(!that._acceptRemoteId.isNull()){
1687 	    JASSERT(_acceptRemoteId == that._acceptRemoteId)(id())(_acceptRemoteId)(that._acceptRemoteId)
1688 	      .Text("Merging connections disagree on remote host");
1689 	  }
1690 	}
1691 	
1692 	void dmtcp::PtyConnection::mergeWith ( const Connection& _that ){
1693 	  Connection::mergeWith(_that);
1694 	  const PtyConnection& that = (const PtyConnection&)_that;
1695 	  JWARNING(_ptsName       == that._ptsName)       MERGE_MISMATCH_TEXT;
1696 	  JWARNING(_uniquePtsName == that._uniquePtsName) MERGE_MISMATCH_TEXT;
1697 	}
1698 	
1699 	void dmtcp::FileConnection::mergeWith ( const Connection& _that ){
1700 	  const FileConnection& that = (const FileConnection&)_that;
1701 	  JTRACE("Merging file connections") (_path) (_type) (that._path) (that._type);
1702 	  Connection::mergeWith(_that);
1703 	  JWARNING(_path   == that._path)   MERGE_MISMATCH_TEXT;
1704 	  JWARNING(_offset == that._offset) MERGE_MISMATCH_TEXT;
1705 	  if (!_checkpointed) {
1706 	    _checkpointed = that._checkpointed;
1707 	    _rel_path     = that._rel_path;
1708 	    _ckptFilesDir = that._ckptFilesDir;
1709 	  }
1710 	
1711 	  //JWARNING(false)(id()).Text("We shouldn't be merging file connections, should we?");
1712 	}
1713 	
1714 	void dmtcp::FifoConnection::mergeWith ( const Connection& _that ){
1715 	  Connection::mergeWith(_that);
1716 	  const FifoConnection& that = (const FifoConnection&)_that;
1717 	  JWARNING(_path   == that._path)   MERGE_MISMATCH_TEXT;
1718 	  //JWARNING(false)(id()).Text("We shouldn't be merging fifo connections, should we?");
1719 	}
1720 	
1721 	////////////
1722 	///// STDIO CHECKPOINTING
1723 	
1724 	void dmtcp::StdioConnection::preCheckpoint ( const dmtcp::vector<int>& fds, KernelBufferDrainer& drain ){
1725 	  //JTRACE ("Checkpointing stdio") (fds[0]) (id());
1726 	}
1727 	void dmtcp::StdioConnection::postCheckpoint ( const dmtcp::vector<int>& fds , bool isRestart ) {
1728 	  //nothing
1729 	}
1730 	void dmtcp::StdioConnection::restore ( const dmtcp::vector<int>& fds, ConnectionRewirer& ){
1731 	  for(size_t i=0; i<fds.size(); ++i){
1732 	    int fd = fds[i];
1733 	    if(fd <= 2){
1734 	      JTRACE("Skipping restore of STDIO, just inherit from parent")(fd);
1735 	      continue;
1736 	    }
1737 	    int oldFd;
1738 	    switch(_type){
1739 	      case STDIO_IN:
1740 	        JTRACE("Restoring STDIN")(fd);
1741 	        oldFd=0;
1742 	        break;
1743 	      case STDIO_OUT:
1744 	        JTRACE("Restoring STDOUT")(fd);
1745 	        oldFd=1;
1746 	        break;
1747 	      case STDIO_ERR:
1748 	        JTRACE("Restoring STDERR")(fd);
1749 	        oldFd=2;
1750 	        break;
1751 	      default:
1752 	        JASSERT(false);
1753 	    }
1754 	    errno = 0;
1755 	    JWARNING ( _real_dup2 ( oldFd, fd ) == fd ) ( oldFd ) ( fd ) ( JASSERT_ERRNO );
1756 	  }
1757 	}
1758 	void dmtcp::StdioConnection::restoreOptions ( const dmtcp::vector<int>& fds ){
1759 	  //nothing
1760 	}
1761 	
1762 	void dmtcp::StdioConnection::serializeSubClass ( jalib::JBinarySerializer& o ){
1763 	  JSERIALIZE_ASSERT_POINT ( "dmtcp::StdioConnection" );
1764 	}
1765 	
1766 	void dmtcp::StdioConnection::mergeWith ( const Connection& that ){
1767 	  //Connection::mergeWith(that);
1768 	}
1769 	
1770 	void dmtcp::StdioConnection::restartDup2(int oldFd, int newFd){
1771 	  static ConnectionRewirer ignored;
1772 	  restore(dmtcp::vector<int>(1,newFd), ignored);
1773 	}