1 /****************************************************************************
2 * Copyright (C) 2006-2010 by Jason Ansel, Kapil Arya, and Gene Cooperman *
3 * jansel@csail.mit.edu, kapil@ccs.neu.edu, gene@ccs.neu.edu *
4 * *
5 * This file is part of the dmtcp/src module of DMTCP (DMTCP:dmtcp/src). *
6 * *
7 * DMTCP:dmtcp/src is free software: you can redistribute it and/or *
8 * modify it under the terms of the GNU Lesser General Public License as *
9 * published by the Free Software Foundation, either version 3 of the *
10 * License, or (at your option) any later version. *
11 * *
12 * DMTCP:dmtcp/src is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU Lesser General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU Lesser General Public *
18 * License along with DMTCP:dmtcp/src. If not, see *
19 * <http://www.gnu.org/licenses/>. *
20 ****************************************************************************/
21
22 #include "../jalib/jassert.h"
23 #include "../jalib/jfilesystem.h"
24 #include "../jalib/jconvert.h"
25 #include "../jalib/jserialize.h"
26 #include "syscallwrappers.h"
27 #include "dmtcpmessagetypes.h"
28 #include "dmtcpworker.h"
29 #include "protectedfds.h"
30 #include "virtualpidtable.h"
31 #include "util.h"
32 #include "sysvipc.h"
33 #include <sys/ioctl.h>
34 #include <unistd.h>
35 #include <fcntl.h>
36 #include <sys/file.h>
37 #include <sys/mman.h>
38 #include <iostream>
39 #include <ios>
40 #include <fstream>
41
42 /*
43 * Shmid virtualization closely follows PID-Virtualization model.
44 * Algorithm for properly checkpointing shared memory segments.
45 * Helper struct: struct shmMetaInfo { pid_t pid, int creatorSignature }
46 * 1. BARRIER -- SUSPENDED
47 * 2a. If the process is the creator process and the memory-segment wasn't
48 * mapped, map it now. We will unmap it later.
49 * 2b. Copy first sizeof(shmMetaInfo) bytes of memory segment into a temp
50 * buffer, call it origInfo
51 * 3. BARRIER -- LOCKED
52 * 4. Populate a new object of type shmMetaInfo with the following values:
53 * pid = getpid()
54 * creatorSignature = | ~ origInfo.creatorSignature ; if this is the creator process
55 * | origInfo.creatorSignature ; otherwise
56 * 5. Copy this new object to the start of the memory segment.
57 * 6. BARRIER -- DRAINED
58 * 7. If this is the creator process,
59 * 8. do nothing.
60 * 9. else
61 * 10. unmap this shared-memory from process address space (there might be
62 * multiple copies, so unmap them all)
63 * 11. endif
64 * 12. BARRIER -- CHECKPOINTED
65 * 13. At this point, the contents of the memory-segment have been saved.
66 * 14. BARRIER -- REFILLED
67 * 15. Re-map the memory-segment into each process's memory as it existed prior
68 * to checkpoint.
69 * 16. Unmap the memory that was mapped in step 2a.
70 * 17. BARRIER -- RESUME
71 *
72 * Steps involved in Restart
73 * 0. BARRIER -- RESTARTING
74 * 1. Restore process memory
75 * 2. Insert original-shmids into a node-wide shared file so that other
76 * processes can know about all the existing shmids in order to avoid
77 * future conflicts.
78 * 3. BARRIER -- CHECKPOINTED
79 * 4. Read all original-shmids from the file
80 * 5. Re-create shared-memory segments which were checkpointed by this process.
81 * 6. Remap the shm-segment to a temp addr and copy the checkpointed contents
82 * to this address. Now unmap the area where the checkpointed contents were
83 * stored and map the shm-segment on that address. Unmap the temp addr now.
84 * 7. Write original->current mappings for all shmids which we got from
85 * shmget() in previous step.
86 * 8. BARRIER -- REFILLED
87 * 9. Re-map the memory-segment into each process's memory as it existed prior
88 * to checkpoint.
89 * 10. Unmap the memory that was mapped in step 2a.
90 * 11. BARRIER -- RESUME
91 */
92
93 /*
94 * TODO:
95 * 1. Preserve Shmids across exec() -- DONE
96 * 2. Handle the case when the segment is marked for removal at ckpt time.
97 */
98
99 static pthread_mutex_t tblLock = PTHREAD_MUTEX_INITIALIZER;
100
101 static void _do_lock_tbl()
102 {
103 JASSERT(pthread_mutex_lock(&tblLock) == 0) (JASSERT_ERRNO);
104 }
105
106 static void _do_unlock_tbl()
107 {
108 JASSERT(pthread_mutex_unlock(&tblLock) == 0) (JASSERT_ERRNO);
109 }
110
111 static bool isRestarting = false;
112
113 dmtcp::SysVIPC::SysVIPC()
114 {
115 _do_lock_tbl();
116 _shm.clear();
117 _do_unlock_tbl();
118 }
119
120 dmtcp::SysVIPC& dmtcp::SysVIPC::instance()
121 {
122 static SysVIPC *inst = new SysVIPC(); return *inst;
123 }
124
125 int dmtcp::SysVIPC::originalToCurrentShmid(int shmid)
126 {
127 WRAPPER_EXECUTION_DISABLE_CKPT();
128 int currentShmid = shmid;
129 _do_lock_tbl();
130 if (_originalToCurrentShmids.find(shmid) != _originalToCurrentShmids.end()) {
131 currentShmid = _originalToCurrentShmids[shmid];
132 }
133 _do_unlock_tbl();
134 WRAPPER_EXECUTION_ENABLE_CKPT();
135 JTRACE("Original to current shmid") (shmid) (currentShmid);
136 return currentShmid;
137 }
138
139 int dmtcp::SysVIPC::currentToOriginalShmid(int shmid)
140 {
141 WRAPPER_EXECUTION_DISABLE_CKPT();
142 int originalShmid = -1;
143 _do_lock_tbl();
144 for (ShmidMapIter i = _originalToCurrentShmids.begin();
145 i != _originalToCurrentShmids.end();
146 ++i) {
147 if ( shmid == i->second ) {
148 originalShmid = i->first;
149 break;
150 }
151 }
152 _do_unlock_tbl();
153 WRAPPER_EXECUTION_ENABLE_CKPT();
154 JTRACE("current to original shmid") (shmid) (originalShmid);
155 return originalShmid;
156 }
157
158 bool dmtcp::SysVIPC::isConflictingShmid(int shmid)
159 {
160 return (originalToCurrentShmid(shmid) != shmid);
161 }
162
163
164 int dmtcp::SysVIPC::shmaddrToShmid(const void* shmaddr)
165 {
166 WRAPPER_EXECUTION_DISABLE_CKPT();
167 int shmid = -1;
168 _do_lock_tbl();
169 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
170 ShmSegment& shmObj = i->second;
171 if (shmObj.isValidShmaddr(shmaddr)) {
172 shmid = i->first;
173 break;
174 }
175 }
176 _do_unlock_tbl();
177 WRAPPER_EXECUTION_ENABLE_CKPT();
178 return shmid;
179 }
180
181 dmtcp::vector<int> dmtcp::SysVIPC::getShmids()
182 {
183 dmtcp::vector<int> shmids;
184 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
185 shmids.push_back(i->first);
186 }
187 return shmids;
188 }
189
190 void dmtcp::SysVIPC::removeStaleShmObjects()
191 {
192 dmtcp::vector<int> staleShmids;
193 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
194 ShmSegment& shmObj = i->second;
195 if (shmObj.isStale()) {
196 staleShmids.push_back(i->first);
197 }
198 }
199 for (int j = 0; j < staleShmids.size(); ++j) {
200 _shm.erase(staleShmids[j]);
201 }
202 }
203
204 void dmtcp::SysVIPC::prepareForLeaderElection()
205 {
206 isRestarting = false;
207 /* Remove all invalid/removed shm segments*/
208 removeStaleShmObjects();
209
210 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
211 ShmSegment& shmObj = i->second;
212 shmObj.prepareForLeaderElection();
213 }
214 }
215
216 void dmtcp::SysVIPC::leaderElection()
217 {
218 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
219 ShmSegment& shmObj = i->second;
220 shmObj.leaderElection();
221 }
222 }
223
224 void dmtcp::SysVIPC::preCheckpoint()
225 {
226 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
227 ShmSegment& shmObj = i->second;
228 shmObj.preCheckpoint();
229 }
230 }
231
232 void dmtcp::SysVIPC::preResume()
233 {
234 JTRACE("");
235 if (isRestarting) {
236 _originalToCurrentShmids.clear();
237 readShmidMapsFromFile(PROTECTED_SHMIDMAP_FD);
238 _real_close(PROTECTED_SHMIDMAP_FD);
239 }
240
241 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
242 ShmSegment& shmObj = i->second;
243 ShmidMapIter j = _originalToCurrentShmids.find(i->first);
244 JASSERT(j != _originalToCurrentShmids.end())
245 (i->first) (_originalToCurrentShmids.size());
246
247 shmObj.updateCurrentShmid(_originalToCurrentShmids[i->first]);
248 // if (isRestarting) {
249 // shmObj.remapFirstAddrForOwnerOnRestart();
250 // }
251 shmObj.remapAll();
252 }
253 }
254
255 void dmtcp::SysVIPC::postCheckpoint()
256 {
257 if (!isRestarting) return;
258
259 JTRACE("");
260
261 _originalToCurrentShmids.clear();
262 readShmidMapsFromFile(PROTECTED_SHMIDLIST_FD);
263 _real_close(PROTECTED_SHMIDLIST_FD);
264
265 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
266 ShmSegment& shmObj = i->second;
267 shmObj.recreateShmSegment();
268 }
269
270 _originalToCurrentShmids.clear();
271 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
272 ShmSegment& shmObj = i->second;
273 if (shmObj.isOwner()) {
274 _originalToCurrentShmids[i->first] = shmObj.currentShmid();
275 }
276 }
277 writeShmidMapsToFile(PROTECTED_SHMIDMAP_FD);
278 }
279
280 void dmtcp::SysVIPC::postRestart()
281 {
282 isRestarting = true;
283 _originalToCurrentShmids.clear();
284
285 JTRACE("");
286 for (ShmIterator i = _shm.begin(); i != _shm.end(); ++i) {
287 ShmSegment& shmObj = i->second;
288 if (shmObj.isOwner()) {
289 JTRACE("Writing ShmidMap to file")(shmObj.originalShmid());
290 _originalToCurrentShmids[shmObj.originalShmid()] = shmObj.currentShmid();
291 }
292 }
293 if (_originalToCurrentShmids.size() > 0) {
294 writeShmidMapsToFile(PROTECTED_SHMIDLIST_FD);
295 }
296 }
297
298 void dmtcp::SysVIPC::on_shmget(key_t key, size_t size, int shmflg, int shmid)
299 {
300 JASSERT(!isConflictingShmid(shmid)) (shmid) (key) (size)
301 .Text("Duplicate shmid found");
302 _do_lock_tbl();
303 ShmSegment shmObj (key, size, shmflg, shmid);
304 _shm[shmid] = shmObj;
305 _originalToCurrentShmids[shmid] = shmid;
306 _do_unlock_tbl();
307 }
308
309 void dmtcp::SysVIPC::on_shmat(int shmid, const void *shmaddr, int shmflg, void* newaddr)
310 {
311 _do_lock_tbl();
|
At conditional (1): "this->_shm.find(shmid).operator ==(this->_shm.end())": Taking true branch.
|
312 if (_shm.find( shmid ) == _shm.end()) {
313 // This process doesn't know about the given shmid. Create a new entry
314 JTRACE ("Shmid not found in table. Creating new entry") (shmid);
315 ShmSegment shmObj (shmid);
|
Event uninit_use_in_call: |
Using uninitialized value "shmObj._dmtcpMappedAddr" when calling "dmtcp::ShmSegment::operator =(dmtcp::ShmSegment const &)". [details] |
|
Event uninit_use_in_call: |
Using uninitialized value "shmObj._mode" when calling "dmtcp::ShmSegment::operator =(dmtcp::ShmSegment const &)". [details] |
|
Event uninit_use_in_call: |
Using uninitialized value "shmObj._nattch" when calling "dmtcp::ShmSegment::operator =(dmtcp::ShmSegment const &)". [details] |
|
Event uninit_use_in_call: |
Using uninitialized value "shmObj._originalInfo": field "shmObj._originalInfo".creatorSignature is uninitialized when calling "dmtcp::ShmSegment::operator =(dmtcp::ShmSegment const &)". [details] |
|
Event uninit_use_in_call: |
Using uninitialized value "shmObj._ownerInfo": field "shmObj._ownerInfo".creatorSignature is uninitialized when calling "dmtcp::ShmSegment::operator =(dmtcp::ShmSegment const &)". [details] |
|
Event uninit_use_in_call: |
Using uninitialized value "shmObj._shminfo": field "shmObj._shminfo".__unused5 is uninitialized when calling "dmtcp::ShmSegment::operator =(dmtcp::ShmSegment const &)". [details] |
| Also see events: |
[var_decl] |
316 _shm[shmid] = shmObj;
317 _originalToCurrentShmids[shmid] = shmid;
318 }
319
320 JASSERT(shmaddr == NULL || shmaddr == newaddr);
321 _shm[shmid].on_shmat(newaddr, shmflg);
322 _do_unlock_tbl();
323 }
324
325 void dmtcp::SysVIPC::on_shmdt(const void *shmaddr)
326 {
327 int shmid = shmaddrToShmid(shmaddr);
328 JASSERT(shmid != -1) (shmaddr)
329 .Text("No corresponding shmid found for given shmaddr");
330 _do_lock_tbl();
331 _shm[shmid].on_shmdt(shmaddr);
332 _do_unlock_tbl();
333 }
334
335 void dmtcp::SysVIPC::writeShmidMapsToFile(int fd)
336 {
337 dmtcp::string file = "/proc/self/fd/" + jalib::XToString ( fd );
338 file = jalib::Filesystem::ResolveSymlink ( file );
339 JASSERT ( file.length() > 0 ) ( file ) ( fd );
340
341 jalib::JBinarySerializeWriterRaw wr (file, fd);
342
343 Util::lockFile(fd);
344 wr.serializeMap(_originalToCurrentShmids);
345 Util::unlockFile(fd);
346 }
347
348 void dmtcp::SysVIPC::readShmidMapsFromFile(int fd)
349 {
350 dmtcp::string file = "/proc/self/fd/" + jalib::XToString ( fd );
351 file = jalib::Filesystem::ResolveSymlink ( file );
352 JASSERT ( file.length() > 0 ) ( file );
353
354 jalib::JBinarySerializeReader rd(file);
355
356 while (!rd.isEOF()) {
357 rd.serializeMap(_originalToCurrentShmids);
358 }
359 }
360
361 void dmtcp::SysVIPC::serialize(jalib::JBinarySerializer& o)
362 {
363 o.serializeMap(_originalToCurrentShmids);
364 }
365
366 /* ShmSegment Methods */
367
368 dmtcp::ShmSegment::ShmSegment(key_t key, int size, int shmflg, int shmid)
369 {
370 _key = key;
371 _size = size;
372 _shmgetFlags = shmflg;
373 _originalShmid = shmid;
374 _currentShmid = shmid;
375 _creatorPid = getpid();
376 }
377
378 dmtcp::ShmSegment::ShmSegment(int shmid)
379 {
380 struct shmid_ds shminfo;
381 JASSERT(_real_shmctl(shmid, IPC_STAT, &shminfo) != -1);
382 _key = shminfo.shm_perm.__key;
383 _size = shminfo.shm_segsz;
384 _shmgetFlags = shminfo.shm_perm.mode;
385 _originalShmid = shmid;
386 _currentShmid = shmid;
387 _creatorPid = VirtualPidTable::instance().currentToOriginalPid(shminfo.shm_cpid);
388 }
389
390 bool dmtcp::ShmSegment::isValidShmaddr(const void* shmaddr)
391 {
392 return _shmaddrToFlag.find((void*)shmaddr) != _shmaddrToFlag.end();
393 }
394
395 bool dmtcp::ShmSegment::isStale()
396 {
397 struct shmid_ds shminfo;
398 int ret = _real_shmctl(_currentShmid, IPC_STAT, &shminfo);
399 if (ret == -1) {
400 JASSERT (errno == EIDRM || errno == EINVAL);
401 JASSERT(_shmaddrToFlag.empty());
402 return true;
403 }
404 _nattch = shminfo.shm_nattch;
405 _mode = shminfo.shm_perm.mode;
406 return false;
407 }
408
409 void dmtcp::ShmSegment::prepareForLeaderElection()
410 {
411 /* If the creator process hasn't mapped this object, map it now so that it
412 * can be checkpointed. In the post restart routine, we will be unmapping
413 * this address.
414 *
415 * TODO: If the segment has been marked for deletion, we might accidently
416 * loose it if the unmapping happens before it is re-mapped by the other
417 * processes.
418 */
419 if (_nattch == 0 || (_creatorPid == getpid() && _shmaddrToFlag.empty())) {
420
421 void *mapaddr = _real_shmat(_originalShmid, NULL, 0);
422 JASSERT(mapaddr != (void*) -1);
423 _shmaddrToFlag[mapaddr] = 0;
424 _dmtcpMappedAddr = true;
425 } else {
426 _dmtcpMappedAddr = false;
427 }
428
429 ShmaddrToFlagIter i = _shmaddrToFlag.begin();
430 JASSERT (i != _shmaddrToFlag.end());
431
432 pid_t *addr = (pid_t *) i->first;
433 _originalInfo.pid = *addr;
434 _originalInfo.creatorSignature = *(int*)(addr+1);
435 }
436
437 void dmtcp::ShmSegment::leaderElection()
438 {
439 /*
440 * We want only one process to save a copy of this segment in its checkpoint image.
441 */
442 _ownerInfo.pid = getpid();
443
444 ShmaddrToFlagIter i = _shmaddrToFlag.begin();
445 JASSERT (i != _shmaddrToFlag.end());
446
447 pid_t *addr = (pid_t *) i->first;
448 *addr = _ownerInfo.pid;
449 if (getpid() == _creatorPid) {
450 _ownerInfo.creatorSignature = ~(_originalInfo.creatorSignature);
451 *(int*)(addr+1) = _ownerInfo.creatorSignature;
452 } else {
453 _ownerInfo.creatorSignature = _originalInfo.creatorSignature;
454 }
455 }
456
457 void dmtcp::ShmSegment::preCheckpoint()
458 {
459 ShmaddrToFlagIter i = _shmaddrToFlag.begin();
460 JASSERT (i != _shmaddrToFlag.end());
461
462 pid_t *addr = (pid_t *) i->first;
463 _ownerInfo.pid = *addr;
464 _ownerInfo.creatorSignature = *(int*)(addr+1);
465
466 if (getpid() == _creatorPid) {
467 /* This shared memory object was created by us. Checkpoint it */
468 JASSERT (_ownerInfo.creatorSignature != _originalInfo.creatorSignature);
469 _ownerInfo.pid = getpid();
470
471 JTRACE("Owner/Creator of the shared memory segment. Will ckpt it.") (getpid());
472
473 // Unmap all but first mapped addr
474 ++i;
475 for (; i != _shmaddrToFlag.end(); ++i) {
476 JASSERT(_real_shmdt(i->first) == 0);
477 JTRACE("Unmapping shared memory segment") (_originalShmid)(_currentShmid)(i->first);
478 }
479
480 } else if (_ownerInfo.creatorSignature == _originalInfo.creatorSignature &&
481 getpid() == _ownerInfo.pid) {
482 /* Creator process not alive and we have the leadership of this
483 * shared-memory object, so checkpoint it.
484 */
485 // Unmap all but first mapped addr
486 ++i;
487 for (; i != _shmaddrToFlag.end(); ++i) {
488 JASSERT(_real_shmdt(i->first) == 0);
489 JTRACE("Unmapping shared memory segment") (_originalShmid)(_currentShmid)(i->first);
490 }
491
492 } else {
493 /* Either creator process is alive or this process was not elected to
494 * checkpoint this area so it should unmap all the mappings of this
495 * memory-segment.
496 */
497 _ownerInfo.pid = 0;
498 for (; i != _shmaddrToFlag.end(); ++i) {
499 JASSERT(_real_shmdt(i->first) == 0);
500 JTRACE("Unmapping shared memory segment") (_originalShmid)(_currentShmid)(i->first);
501 }
502 }
503 }
504
505 void dmtcp::ShmSegment::recreateShmSegment()
506 {
507 JASSERT(isRestarting);
508 if (_ownerInfo.pid == getpid()) {
509 while (true) {
510 int shmid = _real_shmget(_key, _size, _shmgetFlags);
511 if (!SysVIPC::instance().isConflictingShmid(shmid)) {
512 JTRACE("Recreating shared memory segment") (_originalShmid) (shmid);
513 _currentShmid = shmid;
514 break;
515 }
516 JASSERT(_real_shmctl(shmid, IPC_RMID, NULL) != -1);
517 }
518 remapFirstAddrForOwnerOnRestart();
519 }
520 }
521
522 void dmtcp::ShmSegment::remapFirstAddrForOwnerOnRestart()
523 {
524 JASSERT(_ownerInfo.pid == getpid());
525 ShmaddrToFlagIter i = _shmaddrToFlag.begin();
526 void *tmpaddr = _real_shmat(_currentShmid, NULL, 0);
527 JASSERT(tmpaddr != (void*) -1) (_currentShmid)(JASSERT_ERRNO);
528 memcpy(tmpaddr, i->first, _size);
529 munmap(i->first, _size);
530 JASSERT (_real_shmat(_currentShmid, i->first, i->second) != (void *) -1);
531 JASSERT(_real_shmdt(tmpaddr) == 0);
532 JTRACE("Remapping shared memory segment")(_currentShmid);
533 }
534
535 void dmtcp::ShmSegment::remapAll()
536 {
537 ShmaddrToFlagIter i = _shmaddrToFlag.begin();
538 if (_ownerInfo.pid == getpid()) {
539 // The address is already mapped, so we won't segfault
540 pid_t *addr = (pid_t *) i->first;
541 *addr = _originalInfo.pid;
542 *(int*)(addr+1) = _originalInfo.creatorSignature;
543 JTRACE("Owner process, restoring first 8 bytes of shared area");
544 }
545
546 for (i = _shmaddrToFlag.begin() ; i != _shmaddrToFlag.end(); ++i) {
547 if (_real_shmat(_currentShmid, i->first, i->second) == (void *) -1) {
548 JASSERT(errno == EINVAL && _ownerInfo.pid == getpid()) (JASSERT_ERRNO) (_currentShmid) (_originalShmid)(i->first) (_ownerInfo.pid)(getpid()) (_creatorPid);
549 }
550 JTRACE("Remapping shared memory segment")(_currentShmid);
551 }
552 }
553
554 void dmtcp::ShmSegment::on_shmat(void *shmaddr, int shmflg)
555 {
556 _shmaddrToFlag[shmaddr] = shmflg;
557 }
558
559 void dmtcp::ShmSegment::on_shmdt(const void *shmaddr)
560 {
561 JASSERT(isValidShmaddr(shmaddr));
562 _shmaddrToFlag.erase((void*)shmaddr);
563
564 // TODO: If num-attached == 0; and marked for deletion, remove this segment
565 }
566