@@ -9,8 +9,10 @@ import (
99 "os/exec"
1010 "path/filepath"
1111 "strconv"
12+ "strings"
1213 "sync"
1314 "syscall"
15+ "time"
1416
1517 "github.com/Sirupsen/logrus"
1618 "github.com/docker/containerd/specs"
@@ -126,6 +128,13 @@ func loadProcess(root, id string, c *container, s *ProcessState) (*process, erro
126128 },
127129 state : Stopped ,
128130 }
131+
132+ startTime , err := ioutil .ReadFile (filepath .Join (p .root , StartTimeFile ))
133+ if err != nil && ! os .IsNotExist (err ) {
134+ return nil , err
135+ }
136+ p .startTime = string (startTime )
137+
129138 if _ , err := p .getPidFromFile (); err != nil {
130139 return nil , err
131140 }
@@ -151,6 +160,30 @@ func loadProcess(root, id string, c *container, s *ProcessState) (*process, erro
151160 return p , nil
152161}
153162
163+ func readProcStatField (pid int , field int ) (string , error ) {
164+ data , err := ioutil .ReadFile (filepath .Join (string (filepath .Separator ), "proc" , strconv .Itoa (pid ), "stat" ))
165+ if err != nil {
166+ return "" , err
167+ }
168+
169+ if field > 2 {
170+ // First, split out the name since he could contains spaces.
171+ parts := strings .Split (string (data ), ") " )
172+ // Now split out the rest, we end up with 2 fields less
173+ parts = strings .Split (parts [1 ], " " )
174+ return parts [field - 2 - 1 ], nil // field count start at 1 in manual
175+ }
176+
177+ parts := strings .Split (string (data ), " (" )
178+
179+ if field == 1 {
180+ return parts [0 ], nil
181+ }
182+
183+ parts = strings .Split (parts [1 ], ") " )
184+ return parts [0 ], nil
185+ }
186+
154187type process struct {
155188 root string
156189 id string
@@ -165,6 +198,7 @@ type process struct {
165198 cmdDoneCh chan struct {}
166199 state State
167200 stateLock sync.Mutex
201+ startTime string
168202}
169203
170204func (p * process ) ID () string {
@@ -195,7 +229,47 @@ func (p *process) Resize(w, h int) error {
195229}
196230
197231func (p * process ) handleSigkilledShim (rst int , rerr error ) (int , error ) {
198- if rerr == nil || p .cmd == nil || p .cmd .Process == nil {
232+ if p .cmd == nil || p .cmd .Process == nil {
233+ e := unix .Kill (p .pid , 0 )
234+ if e == syscall .ESRCH {
235+ return rst , rerr
236+ }
237+
238+ // If it's not the same process, just mark it stopped and set
239+ // the status to 255
240+ if same , err := p .isSameProcess (); ! same {
241+ logrus .Warnf ("containerd: %s:%s (pid %d) is not the same process anymore (%v)" , p .container .id , p .id , p .pid , err )
242+ p .stateLock .Lock ()
243+ p .state = Stopped
244+ p .stateLock .Unlock ()
245+ // Create the file so we get the exit event generated once monitor kicks in
246+ // without going to this all process again
247+ rerr = ioutil .WriteFile (filepath .Join (p .root , ExitStatusFile ), []byte ("255" ), 0644 )
248+ return 255 , nil
249+ }
250+
251+ ppid , err := readProcStatField (p .pid , 4 )
252+ if err != nil {
253+ return rst , fmt .Errorf ("could not check process ppid: %v (%v)" , err , rerr )
254+ }
255+ if ppid == "1" {
256+ logrus .Warnf ("containerd: %s:%s shim died, killing associated process" , p .container .id , p .id )
257+ unix .Kill (p .pid , syscall .SIGKILL )
258+ // wait for the process to die
259+ for {
260+ e := unix .Kill (p .pid , 0 )
261+ if e == syscall .ESRCH {
262+ break
263+ }
264+ time .Sleep (10 * time .Millisecond )
265+ }
266+
267+ rst = 128 + int (syscall .SIGKILL )
268+ // Create the file so we get the exit event generated once monitor kicks in
269+ // without going to this all process again
270+ rerr = ioutil .WriteFile (filepath .Join (p .root , ExitStatusFile ), []byte (fmt .Sprintf ("%d" , rst )), 0644 )
271+ }
272+
199273 return rst , rerr
200274 }
201275
@@ -218,6 +292,9 @@ func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) {
218292 wpid int
219293 )
220294
295+ // Some processes change their PR_SET_PDEATHSIG, so force kill them
296+ unix .Kill (p .pid , syscall .SIGKILL )
297+
221298 for wpid == 0 {
222299 wpid , e = unix .Wait4 (p .pid , & status , unix .WNOHANG , & rusage )
223300 if e != nil {
@@ -244,7 +321,9 @@ func (p *process) handleSigkilledShim(rst int, rerr error) (int, error) {
244321func (p * process ) ExitStatus () (rst int , rerr error ) {
245322 data , err := ioutil .ReadFile (filepath .Join (p .root , ExitStatusFile ))
246323 defer func () {
247- rst , rerr = p .handleSigkilledShim (rst , rerr )
324+ if rerr != nil {
325+ rst , rerr = p .handleSigkilledShim (rst , rerr )
326+ }
248327 }()
249328 if err != nil {
250329 if os .IsNotExist (err ) {
@@ -297,6 +376,40 @@ func (p *process) getPidFromFile() (int, error) {
297376 return i , nil
298377}
299378
379+ func (p * process ) readStartTime () (string , error ) {
380+ return readProcStatField (p .pid , 22 )
381+ }
382+
383+ func (p * process ) saveStartTime () error {
384+ startTime , err := p .readStartTime ()
385+ if err != nil {
386+ return err
387+ }
388+
389+ p .startTime = startTime
390+ return ioutil .WriteFile (filepath .Join (p .root , StartTimeFile ), []byte (startTime ), 0644 )
391+ }
392+
393+ func (p * process ) isSameProcess () (bool , error ) {
394+ // for backward compat assume it's the same if startTime wasn't set
395+ if p .startTime == "" {
396+ return true , nil
397+ }
398+ if p .pid == 0 {
399+ _ , err := p .getPidFromFile ()
400+ if err != nil {
401+ return false , err
402+ }
403+ }
404+
405+ startTime , err := p .readStartTime ()
406+ if err != nil {
407+ return false , err
408+ }
409+
410+ return startTime == p .startTime , nil
411+ }
412+
300413// Wait will reap the shim process
301414func (p * process ) Wait () {
302415 if p .cmdDoneCh != nil {
0 commit comments