From 00d03422a2e421473c6f73ccc94eeaefc6c0f0a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Gali=C4=87?= Date: Tue, 30 Oct 2012 21:52:42 +0100 Subject: [PATCH 1/3] traffic_cop: wait() for children on SIGTERM rework traffic_cop's signal handling to register a handler for SIGTERM. In this handler we now send SIGTERM to all children and then wait() for them. This should allow us to handle restarts in upstart more gracefully. Finally, we simplify the handling on Linux by no longer parsing /proc: It's not necessary. Linux is a POSIX system too. --- cop/TrafficCop.cc | 74 ++++++++++++++++++++++++++++++---------------- lib/ts/lockfile.cc | 55 +++++++--------------------------- 2 files changed, 59 insertions(+), 70 deletions(-) diff --git a/cop/TrafficCop.cc b/cop/TrafficCop.cc index 58178865c85..74e63eac7bb 100644 --- a/cop/TrafficCop.cc +++ b/cop/TrafficCop.cc @@ -45,7 +45,7 @@ union semun #endif // linux check // For debugging, turn this on. -// #define TRACE_LOG_COP 1 +#define TRACE_LOG_COP 1 #define OPTIONS_MAX 32 #define OPTIONS_LEN_MAX 1024 @@ -212,6 +212,7 @@ chown_file_to_admin_user(const char *file) { } } } + static void sig_child(int signum) { @@ -238,6 +239,43 @@ sig_child(int signum) cop_log_trace("Leaving sig_child(%d)\n", signum); } +static void +sig_term(int signum) +{ + pid_t pid = 0; + int status = 0; + int err; + pid_t holding_pid; + + //killsig = SIGTERM; + + cop_log_trace("Entering sig_term(%d)\n", signum); + + // safely^W commit suicide. + cop_log_trace("Sending signal %d to entire group\n", signum); + killpg(0, signum); + + cop_log_trace("Waiting for children to exit."); + + for (;;) { + pid = waitpid(WAIT_ANY, &status, WNOHANG); + + if (pid <= 0) { + break; + } + // TSqa03086 - We can not log the child status signal from + // the signal handler since syslog can deadlock. Record + // the pid and the status in a global for logging + // next time through the event loop. We will occasionally + // lose some information if we get two sig childs in rapid + // succession + child_pid = pid; + child_status = status; + } + cop_log_trace("Leaving sig_term(%d), exiting traffic_cop\n", signum); + exit(0); +} + static void #if defined(solaris) sig_fatal(int signum, siginfo_t * t, void *c) @@ -1381,18 +1419,7 @@ check_programs() Lockfile manager_lf(manager_lockfile); err = manager_lf.Open(&holding_pid); chown_file_to_admin_user(manager_lockfile); -#if defined(linux) - // if lockfile held, but process doesn't exist, killall and try again - if (err == 0) { - if (kill(holding_pid, 0) == -1) { - cop_log(COP_WARNING, "%s's lockfile is held, but its pid (%d) is missing;" - " killing all processes named '%s' and retrying\n", manager_binary, holding_pid, manager_binary); - ink_killall(manager_binary, killsig); - sleep(1); // give signals a chance to be received - err = manager_lf.Open(&holding_pid); - } - } -#endif + if (err > 0) { // 'lockfile_open' returns the file descriptor of the opened // lockfile. We need to close this before spawning the @@ -1473,18 +1500,7 @@ check_programs() Lockfile server_lf(server_lockfile); err = server_lf.Open(&holding_pid); -#if defined(linux) - // if lockfile held, but process doesn't exist, killall and try again - if (err == 0) { - if (kill(holding_pid, 0) == -1) { - cop_log(COP_WARNING, "%s's lockfile is held, but its pid (%d) is missing;" - " killing all processes named '%s' and retrying\n", server_binary, holding_pid, server_binary); - ink_killall(server_binary, killsig); - sleep(1); // give signals a chance to be received - err = server_lf.Open(&holding_pid); - } - } -#endif + if (err > 0) { server_lf.Close(); @@ -1673,6 +1689,14 @@ init_signals() struct sigaction action; cop_log_trace("Entering init_signals()\n"); + // Handle the SIGTERM signal: We simply do the same as + // in sig_child.. + action.sa_handler = sig_term; + sigemptyset(&action.sa_mask); + action.sa_flags = 0; + + sigaction(SIGTERM, &action, NULL); + // Handle the SIGCHLD signal. We simply reap all children that // die (which should only be spawned traffic_manager's). action.sa_handler = sig_child; diff --git a/lib/ts/lockfile.cc b/lib/ts/lockfile.cc index b3d345db8d2..c2d34c4684a 100644 --- a/lib/ts/lockfile.cc +++ b/lib/ts/lockfile.cc @@ -205,54 +205,21 @@ static void lockfile_kill_internal(pid_t init_pid, int init_sig, pid_t pid, const char *pname, int sig) { int err; - -#if defined(linux) - - pid_t *pidv = NULL; - int pidvcnt; - - // Need to grab pname's pid vector before we issue any kill signals. - // Specifically, this prevents the race-condition in which - // traffic_manager spawns a new traffic_server while we still think - // we're killall'ing the old traffic_server. - if (pname) { - ink_killall_get_pidv_xmalloc(pname, &pidv, &pidvcnt); - } - - if (init_sig > 0) { - kill(init_pid, init_sig); - // sleep for a bit and give time for the first signal to be - // delivered - sleep(1); - } - - do { - if ((err = kill(pid, sig)) == 0) { - sleep(1); - } - if (pname && (pidvcnt > 0)) { - ink_killall_kill_pidv(pidv, pidvcnt, sig); - sleep(1); - } - } while ((err == 0) || ((err < 0) && (errno == EINTR))); - - ats_free(pidv); - -#else + int status; if (init_sig > 0) { kill(init_pid, init_sig); - // sleep for a bit and give time for the first signal to be - // delivered - sleep(1); + // Wait for children to exit + do { + err = waitpid(-1, &status, WNOHANG); + if (err == -1) break; + } while(!WIFEXITED(status) && !WIFSIGNALED(status)); } do { err = kill(pid, sig); } while ((err == 0) || ((err < 0) && (errno == EINTR))); -#endif // linux check - } void @@ -294,14 +261,12 @@ Lockfile::KillGroup(int sig, int initial_sig, const char *pname) if ((pid < 0) || (pid == getpid())) pid = holding_pid; - else - pid = -pid; if (pid != 0) { - // We kill the holding_pid instead of the process_group - // initially since there is no point trying to get core files - // from a group since the core file of one overwrites the core - // file of another one + // This way, we kill the process_group: + pid = -pid; + // In order to get core files from each process, please + // set your core_pattern appropriately. lockfile_kill_internal(holding_pid, initial_sig, pid, pname, sig); } } From f7735f5fe79b83957aacacd48bac829514947128 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Gali=C4=87?= Date: Tue, 30 Oct 2012 22:26:40 +0100 Subject: [PATCH 2/3] cleanup: get rid of ink_killall implementation After ripping it out from the code, remove it the Linux-specific, /proc parsing ink_killall implementation. --- cop/TrafficCop.cc | 1 - lib/ts/Makefile.am | 2 - lib/ts/ink_killall.cc | 149 ------------------------------------------ lib/ts/ink_killall.h | 63 ------------------ lib/ts/libts.h | 1 - lib/ts/lockfile.cc | 9 +-- 6 files changed, 1 insertion(+), 224 deletions(-) delete mode 100644 lib/ts/ink_killall.cc delete mode 100644 lib/ts/ink_killall.h diff --git a/cop/TrafficCop.cc b/cop/TrafficCop.cc index 74e63eac7bb..0f0f48c460a 100644 --- a/cop/TrafficCop.cc +++ b/cop/TrafficCop.cc @@ -29,7 +29,6 @@ #if defined(linux) || defined (solaris) #include "sys/utsname.h" -#include "ink_killall.h" #include #include #include diff --git a/lib/ts/Makefile.am b/lib/ts/Makefile.am index 4195d3db61b..107fc4c3f38 100644 --- a/lib/ts/Makefile.am +++ b/lib/ts/Makefile.am @@ -71,8 +71,6 @@ libtsutil_la_SOURCES = \ ink_inet.cc \ ink_inet.h \ ink_inout.h \ - ink_killall.cc \ - ink_killall.h \ ink_llqueue.h \ ink_lockfile.h \ INK_MD5.h \ diff --git a/lib/ts/ink_killall.cc b/lib/ts/ink_killall.cc deleted file mode 100644 index 81fab9c8679..00000000000 --- a/lib/ts/ink_killall.cc +++ /dev/null @@ -1,149 +0,0 @@ -/** @file - - A brief file description - - @section license License - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -/*------------------------------------------------------------------------- - -------------------------------------------------------------------------*/ - -#include "libts.h" - -#if defined(linux) - -#include "ink_killall.h" -#include "ink_resource.h" - -#define PROC_BASE "/proc" - -#define INITIAL_PIDVSIZE 32 - -int -ink_killall(const char *pname, int sig) -{ - - int err; - pid_t *pidv; - int pidvcnt; - - if (ink_killall_get_pidv_xmalloc(pname, &pidv, &pidvcnt) < 0) { - return -1; - } - - if (pidvcnt == 0) { - ats_free(pidv); - return 0; - } - - err = ink_killall_kill_pidv(pidv, pidvcnt, sig); - ats_free(pidv); - - return err; - -} - -int -ink_killall_get_pidv_xmalloc(const char *pname, pid_t ** pidv, int *pidvcnt) -{ - - DIR *dir; - FILE *fp; - struct dirent *de; - pid_t pid, self; - char buf[LINE_MAX], *p, *comm; - int pidvsize = INITIAL_PIDVSIZE; - - if (!pname || !pidv || !pidvcnt) - goto l_error; - - self = getpid(); - if (!(dir = opendir(PROC_BASE))) - goto l_error; - - *pidvcnt = 0; - *pidv = (pid_t *)ats_malloc(pidvsize * sizeof(pid_t)); - - while ((de = readdir(dir))) { - if (!(pid = (pid_t) atoi(de->d_name)) || pid == self) - continue; - snprintf(buf, sizeof(buf), PROC_BASE "/%d/stat", pid); - if ((fp = fopen(buf, "r"))) { - if (fgets(buf, sizeof buf, fp) == 0) - goto l_close; - if ((p = strchr(buf, '('))) { - comm = p + 1; - if ((p = strchr(comm, ')'))) - *p = '\0'; - else - goto l_close; - if (strcmp(comm, pname) == 0) { - if (*pidvcnt >= pidvsize) { - pid_t *pidv_realloc; - pidvsize *= 2; - if (!(pidv_realloc = (pid_t *)ats_realloc(*pidv, pidvsize * sizeof(pid_t)))) { - ats_free(*pidv); - goto l_error; - } else { - *pidv = pidv_realloc; - } - } - (*pidv)[*pidvcnt] = pid; - (*pidvcnt)++; - } - } - l_close: - fclose(fp); - } - } - closedir(dir); - - if (*pidvcnt == 0) { - ats_free(*pidv); - *pidv = 0; - } - - return 0; - -l_error: - *pidv = NULL; - *pidvcnt = 0; - return -1; -} - -int -ink_killall_kill_pidv(pid_t * pidv, int pidvcnt, int sig) -{ - - int err = 0; - - if (!pidv || (pidvcnt <= 0)) - return -1; - - while (pidvcnt > 0) { - pidvcnt--; - if (kill(pidv[pidvcnt], sig) < 0) - err = -1; - } - - return err; - -} - -#endif // linux check diff --git a/lib/ts/ink_killall.h b/lib/ts/ink_killall.h deleted file mode 100644 index 9b9f5818a5e..00000000000 --- a/lib/ts/ink_killall.h +++ /dev/null @@ -1,63 +0,0 @@ -/** @file - - A brief file description - - @section license License - - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - */ - -/*------------------------------------------------------------------------- - -------------------------------------------------------------------------*/ - - -#ifndef _INK_KILLALL_H_ -#define _INK_KILLALL_H_ - -#include "ink_config.h" - -#if defined(linux) - -/*------------------------------------------------------------------------- - ink_killall - - Sends signal 'sig' to all processes with the name 'pname' - - Returns: -1 error - 0 okay - -------------------------------------------------------------------------*/ -int ink_killall(const char *pname, int sig); - -/*------------------------------------------------------------------------- - ink_killall_get_pidv_xmalloc - - Get all pid's named 'pname' and stores into ats_malloc'd - pid_t array, 'pidv' - - Returns: -1 error (pidv: set to NULL; pidvcnt: set to 0) - 0 okay (pidv: ats_malloc'd pid vector; pidvcnt: number of pid's; - if pidvcnt is set to 0, then pidv will be set to NULL) - -------------------------------------------------------------------------*/ -int ink_killall_get_pidv_xmalloc(const char *pname, pid_t ** pidv, int *pidvcnt); - -/*------------------------------------------------------------------------- - ink_killall_kill_pidv - - Kills all pid's in 'pidv' with signal 'sig' - - Returns: -1 error - 0 okay - -------------------------------------------------------------------------*/ -int ink_killall_kill_pidv(pid_t * pidv, int pidvcnt, int sig); - -#endif - -#endif diff --git a/lib/ts/libts.h b/lib/ts/libts.h index 833cdf9f155..1006bc928d9 100644 --- a/lib/ts/libts.h +++ b/lib/ts/libts.h @@ -60,7 +60,6 @@ #include "ink_hash_table.h" #include "ink_hrtime.h" #include "ink_inout.h" -#include "ink_killall.h" #include "ink_llqueue.h" #include "ink_lockfile.h" #include "ink_memory.h" diff --git a/lib/ts/lockfile.cc b/lib/ts/lockfile.cc index c2d34c4684a..e3da24d30cc 100644 --- a/lib/ts/lockfile.cc +++ b/lib/ts/lockfile.cc @@ -24,10 +24,6 @@ #include "ink_platform.h" #include "ink_lockfile.h" -#if defined(linux) -#include "ink_killall.h" -#endif - #define LOCKFILE_BUF_LEN 16 // 16 bytes should be enought for a pid int @@ -195,10 +191,7 @@ Lockfile::Close(void) // killed): Unfortunately, it's possible on Linux that the main PID of // the process has been successfully killed (and is waiting to be // reaped while in a defunct state), while some of the other threads -// of the process just don't want to go away. Integrate ink_killall -// into Kill() and KillAll() just to make sure we really kill -// everything and so that we don't spin hard while trying to kill a -// defunct process. +// of the process just don't want to go away. //------------------------------------------------------------------------- static void From c6de8a083821b0a2a77956a264092df994c8c1b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Gali=C4=87?= Date: Tue, 30 Oct 2012 22:29:46 +0100 Subject: [PATCH 3/3] Handle SIGINT the same way as SIGTERM --- cop/TrafficCop.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cop/TrafficCop.cc b/cop/TrafficCop.cc index 0f0f48c460a..76b3efd2405 100644 --- a/cop/TrafficCop.cc +++ b/cop/TrafficCop.cc @@ -1688,13 +1688,14 @@ init_signals() struct sigaction action; cop_log_trace("Entering init_signals()\n"); - // Handle the SIGTERM signal: We simply do the same as - // in sig_child.. + // Handle the SIGTERM and SIGINT signal: + // We kill the process group and wait() for all children action.sa_handler = sig_term; sigemptyset(&action.sa_mask); action.sa_flags = 0; sigaction(SIGTERM, &action, NULL); + sigaction(SIGINT, &action, NULL); // Handle the SIGCHLD signal. We simply reap all children that // die (which should only be spawned traffic_manager's).