diff --git a/set_process.sh b/set_process.sh index e5956f1..c806059 100644 --- a/set_process.sh +++ b/set_process.sh @@ -8,9 +8,216 @@ #前置all的重置或者新增都是由稳态的第一个进程来处理,所有进程收到这条消息后先判断自己的进程号是否是1,而且是稳态,否则不处理,所有操作均由这个进程完成, +if [ -z "$SETSID" ]; then + export SETSID=1 + nohup setsid "$0" "$@" >> /tmp/set_process_detach.log 2>&1 < /dev/null & + exit 0 +fi + +# 关闭从父进程继承来的 socket / pipe / 文件描述符 +for fd_path in /proc/$$/fd/*; do + fd_num=$(basename "$fd_path") + case "$fd_num" in + 0|1|2) ;; + *) eval "exec ${fd_num}>&-" 2>/dev/null ;; + esac +done + # 设置日志文件路径 LOGFILE="$FEP_ENV/dat/log/start_fe.log" +INI_FILE="/FeProject/etc/config/mykafka.ini" + +LOCK_FILE="/tmp/set_process.lock" + +if [ -f "$LOCK_FILE" ]; then + old_pid=$(cat "$LOCK_FILE") + if ps -p "$old_pid" > /dev/null 2>&1; then + echo "Already running: $old_pid" + exit 1 + else + echo "Stale lock found, removing" + rm -f "$LOCK_FILE" + fi +fi + +echo $$ > "$LOCK_FILE" +trap "rm -f $LOCK_FILE" EXIT + +get_ini_value() { + local key="$1" + local line + + line=$(grep -E "^[[:space:]]*${key}=" "$INI_FILE" | tail -n 1) + [ -z "$line" ] && return 1 + + # 去掉 key= + line="${line#*=}" + # 去掉首尾空格 + line=$(echo "$line" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') + + [ -n "$line" ] || return 1 + echo "$line" + return 0 +} + +log() { + echo "$@" | tee -a "$LOGFILE" +} + +check_local_port_released() { + local PORT="$1" + + for retry in $(seq 1 3); do + for i in $(seq 1 20); do + if ! ss -lntp 2>/dev/null | grep -q ":${PORT}[[:space:]]"; then + log "[OK] Local port $PORT released" + return 0 + fi + log "[WAIT] Local port $PORT still in use... ($i/20)" + sleep 1 + done + + log "[RETRY] Local port $PORT not released, retry $retry/3" + sleep 1 + done + + log "[FAIL] Local port $PORT still in use after retries:" + ss -lntp 2>/dev/null | grep ":${PORT}[[:space:]]" | tee -a "$LOGFILE" + + return 1 +} + +check_remote_conn_released() { + local REMOTE_IP="$1" + local REMOTE_PORT="$2" + + for retry in $(seq 1 3); do + for i in $(seq 1 20); do + if ! ss -ntp 2>/dev/null \ + | grep "${REMOTE_IP}:${REMOTE_PORT}" \ + | grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' >/dev/null; then + log "[OK] Remote ${REMOTE_IP}:${REMOTE_PORT} released" + return 0 + fi + + log "[WAIT] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists... ($i/20)" + ss -ntp 2>/dev/null \ + | grep "${REMOTE_IP}:${REMOTE_PORT}" \ + | grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \ + | tee -a "$LOGFILE" + sleep 1 + done + + log "[RETRY] Remote ${REMOTE_IP}:${REMOTE_PORT} not released, retry $retry/3" + sleep 1 + done + + log "[FAIL] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists:" + ss -ntp 2>/dev/null \ + | grep "${REMOTE_IP}:${REMOTE_PORT}" \ + | grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \ + | tee -a "$LOGFILE" + + return 1 +} + +check_value_released() { + local key="$1" + local value="$2" + + [ -z "$value" ] && return 0 + + # 纯端口 + if echo "$value" | grep -Eq '^[0-9]+$'; then + [ "$value" = "0" ] && { log "[SKIP] $key disabled"; return 0; } + + log "[CHECK] $key local port: $value" + check_local_port_released "$value" + return $? + fi + + # URL + if echo "$value" | grep -Eq '^[a-zA-Z]+://'; then + local hostport ip port + hostport=$(echo "$value" | sed -n 's#^[a-zA-Z]\+://\([^/]*\).*#\1#p') + ip="${hostport%%:*}" + port="${hostport##*:}" + + if [ -n "$ip" ] && [ -n "$port" ] && [ "$ip" != "$port" ]; then + log "[CHECK] $key remote: $ip:$port" + check_remote_conn_released "$ip" "$port" + return $? + fi + + log "[SKIP] $key invalid URL: $value" + return 0 + fi + + # ip:port + if echo "$value" | grep -Eq '^[^:]+:[0-9]+$'; then + local ip="${value%%:*}" + local port="${value##*:}" + + log "[CHECK] $key remote: $ip:$port" + check_remote_conn_released "$ip" "$port" + return $? + fi + + log "[SKIP] $key unsupported value: $value" + return 0 +} + +check_key_released() { + local key="$1" + local value + + value=$(get_ini_value "$key") || { + log "[SKIP] $key not found" + return 0 + } + + log "==== Checking $key = $value ====" + check_value_released "$key" "$value" + return $? +} + +check_all_resources_released() { + local ret=0 + + log "==============================" + log " Start checking resources..." + log "==============================" + + # 本地端口 + check_key_released "TestPort" || ret=1 + check_key_released "HttpPort" || ret=1 + check_key_released "SocketPort" || ret=1 + + # MQ + check_key_released "Ipport" || ret=1 + check_key_released "ConsumerIpport" || ret=1 + + # Web + check_key_released "WebDevice" || ret=1 + check_key_released "WebIcd" || ret=1 + check_key_released "WebIntegrity" || ret=1 + check_key_released "WebComflag" || ret=1 + check_key_released "WebEvent" || ret=1 + check_key_released "WebFileupload" || ret=1 + check_key_released "WebFiledownload" || ret=1 + + if [ $ret -eq 0 ]; then + log "✅ ALL resources released" + else + log "❌ Some resources NOT released" + fi + + return $ret +} + + + # 输出当前时间并打印进程停止信息 echo "" ; echo "" echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******" @@ -20,7 +227,7 @@ echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******" >>"$ # 函数检查并处理日志文件大小 check_log_file() { - if [ -n "$1" ]; then + if [ -n "$1" ] && [ -f "$1" ]; then FILE_SIZE=0 FILE_SIZE=$(du "$1" | awk '{print $1}') @@ -45,25 +252,46 @@ check_log_file $LOGFILE # 定义查找并杀死进程的函数 kill_process_by_name() { - PROCESS_NAME=$1 - PID=$(ps -ef | grep "$PROCESS_NAME" | grep -v "grep" | awk '{print $2}') + local PROCESS_NAME="$1" + local PIDS - if [ -n "$PID" ]; then - echo "Found process '$PROCESS_NAME' with PID: $PID" + PIDS=$(ps -ef | grep "$PROCESS_NAME" | grep -v grep | awk '{print $2}') + + if [ -n "$PIDS" ]; then + echo "Found process '$PROCESS_NAME' with PID(s): $PIDS" echo "Killing process..." - kill -15 $PID + + for pid in $PIDS; do + kill -15 "$pid" 2>/dev/null + done + sleep 3 - PID2=$(ps -ef | grep "$PROCESS_NAME" | grep -v "grep" | awk '{print $2}') - if [ -n "$PID2" ]; then - echo "Process still exists, force kill: $PID2" - kill -9 $PID2 - fi + for pid in $PIDS; do + if ps -p "$pid" >/dev/null 2>&1; then + echo "Process still exists, force kill: $pid" + kill -9 "$pid" 2>/dev/null + fi + done else echo "Process '$PROCESS_NAME' not found." fi } +wait_all_exit() { + for i in $(seq 1 30); do + COUNT=$(ps -ef | grep -E 'pt61850netd_pqfe|fe_watchdog' | grep -v grep | wc -l) + if [ "$COUNT" -eq 0 ]; then + echo "All FE processes exited" + return 0 + fi + echo "Waiting FE processes exit... ($COUNT still running)" + sleep 1 + done + echo "Timeout waiting FE processes exit" + return 1 +} + # 功能块开始 handle_reset() { # 功能:reset @@ -73,22 +301,28 @@ handle_reset() { if [ "$2" == "all" ]; then # 关闭旧的看门狗进程 - kill_process_by_name "/FeProject/bin/fe_watchdog" + #kill_process_by_name "/FeProject/bin/fe_watchdog" # 关闭旧的 stat 进程 - kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data" + #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data" # 关闭旧的 recall 进程 - kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data" + #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data" # 关闭旧的 3s 进程 - kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data" + #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data" # 关闭旧的 comtrade 进程 - kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade" + #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade" + + /home/pq/FeProject/boot/stop_fe.sh + + wait_all_exit || exit 1 + + check_all_resources_released || exit 1 #关闭进程后等待一段时间,防止端口占用 - sleep 1 + sleep 5 # 清空 runtime.cf 中的所有进程配置 sed -i '/cfg_stat_data/d' /home/pq/FeProject/etc/runtime.cf @@ -235,14 +469,13 @@ handle_add() { } # 获取当前脚本的进程ID -CURRENT_PID=$$ - +#CURRENT_PID=$$ # 检查是否有其他的set_process.sh脚本正在运行,排除当前脚本 -if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then - echo "set_process.sh is already running. Exiting..." - echo "set_process.sh is already running. Exiting..." >>"$LOGFILE" - exit 1 -fi +#if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then +# echo "set_process.sh is already running. Exiting..." +# echo "set_process.sh is already running. Exiting..." >>"$LOGFILE" +# exit 1 +#fi #脚本应该等待3秒钟 sleep 3