fix process control script

This commit is contained in:
lnk
2026-03-23 16:11:17 +08:00
parent 0acc58bbe1
commit 6f7ee762ec

View File

@@ -8,9 +8,216 @@
#前置all的重置或者新增都是由稳态的第一个进程来处理所有进程收到这条消息后先判断自己的进程号是否是1而且是稳态否则不处理所有操作均由这个进程完成
if [ -z "$SETSID" ]; then
export SETSID=1
nohup setsid "$0" "$@" >> /tmp/set_process_detach.log 2>&1 < /dev/null &
exit 0
fi
# 关闭从父进程继承来的 socket / pipe / 文件描述符
for fd_path in /proc/$$/fd/*; do
fd_num=$(basename "$fd_path")
case "$fd_num" in
0|1|2) ;;
*) eval "exec ${fd_num}>&-" 2>/dev/null ;;
esac
done
# 设置日志文件路径
LOGFILE="$FEP_ENV/dat/log/start_fe.log"
INI_FILE="/FeProject/etc/config/mykafka.ini"
LOCK_FILE="/tmp/set_process.lock"
if [ -f "$LOCK_FILE" ]; then
old_pid=$(cat "$LOCK_FILE")
if ps -p "$old_pid" > /dev/null 2>&1; then
echo "Already running: $old_pid"
exit 1
else
echo "Stale lock found, removing"
rm -f "$LOCK_FILE"
fi
fi
echo $$ > "$LOCK_FILE"
trap "rm -f $LOCK_FILE" EXIT
get_ini_value() {
local key="$1"
local line
line=$(grep -E "^[[:space:]]*${key}=" "$INI_FILE" | tail -n 1)
[ -z "$line" ] && return 1
# 去掉 key=
line="${line#*=}"
# 去掉首尾空格
line=$(echo "$line" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//')
[ -n "$line" ] || return 1
echo "$line"
return 0
}
log() {
echo "$@" | tee -a "$LOGFILE"
}
check_local_port_released() {
local PORT="$1"
for retry in $(seq 1 3); do
for i in $(seq 1 20); do
if ! ss -lntp 2>/dev/null | grep -q ":${PORT}[[:space:]]"; then
log "[OK] Local port $PORT released"
return 0
fi
log "[WAIT] Local port $PORT still in use... ($i/20)"
sleep 1
done
log "[RETRY] Local port $PORT not released, retry $retry/3"
sleep 1
done
log "[FAIL] Local port $PORT still in use after retries:"
ss -lntp 2>/dev/null | grep ":${PORT}[[:space:]]" | tee -a "$LOGFILE"
return 1
}
check_remote_conn_released() {
local REMOTE_IP="$1"
local REMOTE_PORT="$2"
for retry in $(seq 1 3); do
for i in $(seq 1 20); do
if ! ss -ntp 2>/dev/null \
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' >/dev/null; then
log "[OK] Remote ${REMOTE_IP}:${REMOTE_PORT} released"
return 0
fi
log "[WAIT] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists... ($i/20)"
ss -ntp 2>/dev/null \
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \
| tee -a "$LOGFILE"
sleep 1
done
log "[RETRY] Remote ${REMOTE_IP}:${REMOTE_PORT} not released, retry $retry/3"
sleep 1
done
log "[FAIL] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists:"
ss -ntp 2>/dev/null \
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \
| tee -a "$LOGFILE"
return 1
}
check_value_released() {
local key="$1"
local value="$2"
[ -z "$value" ] && return 0
# 纯端口
if echo "$value" | grep -Eq '^[0-9]+$'; then
[ "$value" = "0" ] && { log "[SKIP] $key disabled"; return 0; }
log "[CHECK] $key local port: $value"
check_local_port_released "$value"
return $?
fi
# URL
if echo "$value" | grep -Eq '^[a-zA-Z]+://'; then
local hostport ip port
hostport=$(echo "$value" | sed -n 's#^[a-zA-Z]\+://\([^/]*\).*#\1#p')
ip="${hostport%%:*}"
port="${hostport##*:}"
if [ -n "$ip" ] && [ -n "$port" ] && [ "$ip" != "$port" ]; then
log "[CHECK] $key remote: $ip:$port"
check_remote_conn_released "$ip" "$port"
return $?
fi
log "[SKIP] $key invalid URL: $value"
return 0
fi
# ip:port
if echo "$value" | grep -Eq '^[^:]+:[0-9]+$'; then
local ip="${value%%:*}"
local port="${value##*:}"
log "[CHECK] $key remote: $ip:$port"
check_remote_conn_released "$ip" "$port"
return $?
fi
log "[SKIP] $key unsupported value: $value"
return 0
}
check_key_released() {
local key="$1"
local value
value=$(get_ini_value "$key") || {
log "[SKIP] $key not found"
return 0
}
log "==== Checking $key = $value ===="
check_value_released "$key" "$value"
return $?
}
check_all_resources_released() {
local ret=0
log "=============================="
log " Start checking resources..."
log "=============================="
# 本地端口
check_key_released "TestPort" || ret=1
check_key_released "HttpPort" || ret=1
check_key_released "SocketPort" || ret=1
# MQ
check_key_released "Ipport" || ret=1
check_key_released "ConsumerIpport" || ret=1
# Web
check_key_released "WebDevice" || ret=1
check_key_released "WebIcd" || ret=1
check_key_released "WebIntegrity" || ret=1
check_key_released "WebComflag" || ret=1
check_key_released "WebEvent" || ret=1
check_key_released "WebFileupload" || ret=1
check_key_released "WebFiledownload" || ret=1
if [ $ret -eq 0 ]; then
log "✅ ALL resources released"
else
log "❌ Some resources NOT released"
fi
return $ret
}
# 输出当前时间并打印进程停止信息
echo "" ; echo ""
echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******"
@@ -20,7 +227,7 @@ echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******" >>"$
# 函数检查并处理日志文件大小
check_log_file() {
if [ -n "$1" ]; then
if [ -n "$1" ] && [ -f "$1" ]; then
FILE_SIZE=0
FILE_SIZE=$(du "$1" | awk '{print $1}')
@@ -45,25 +252,46 @@ check_log_file $LOGFILE
# 定义查找并杀死进程的函数
kill_process_by_name() {
PROCESS_NAME=$1
PID=$(ps -ef | grep "$PROCESS_NAME" | grep -v "grep" | awk '{print $2}')
local PROCESS_NAME="$1"
local PIDS
if [ -n "$PID" ]; then
echo "Found process '$PROCESS_NAME' with PID: $PID"
PIDS=$(ps -ef | grep "$PROCESS_NAME" | grep -v grep | awk '{print $2}')
if [ -n "$PIDS" ]; then
echo "Found process '$PROCESS_NAME' with PID(s): $PIDS"
echo "Killing process..."
kill -15 $PID
for pid in $PIDS; do
kill -15 "$pid" 2>/dev/null
done
sleep 3
PID2=$(ps -ef | grep "$PROCESS_NAME" | grep -v "grep" | awk '{print $2}')
if [ -n "$PID2" ]; then
echo "Process still exists, force kill: $PID2"
kill -9 $PID2
fi
for pid in $PIDS; do
if ps -p "$pid" >/dev/null 2>&1; then
echo "Process still exists, force kill: $pid"
kill -9 "$pid" 2>/dev/null
fi
done
else
echo "Process '$PROCESS_NAME' not found."
fi
}
wait_all_exit() {
for i in $(seq 1 30); do
COUNT=$(ps -ef | grep -E 'pt61850netd_pqfe|fe_watchdog' | grep -v grep | wc -l)
if [ "$COUNT" -eq 0 ]; then
echo "All FE processes exited"
return 0
fi
echo "Waiting FE processes exit... ($COUNT still running)"
sleep 1
done
echo "Timeout waiting FE processes exit"
return 1
}
# 功能块开始
handle_reset() {
# 功能reset
@@ -73,22 +301,28 @@ handle_reset() {
if [ "$2" == "all" ]; then
# 关闭旧的看门狗进程
kill_process_by_name "/FeProject/bin/fe_watchdog"
#kill_process_by_name "/FeProject/bin/fe_watchdog"
# 关闭旧的 stat 进程
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data"
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data"
# 关闭旧的 recall 进程
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data"
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data"
# 关闭旧的 3s 进程
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data"
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data"
# 关闭旧的 comtrade 进程
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade"
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade"
/home/pq/FeProject/boot/stop_fe.sh
wait_all_exit || exit 1
check_all_resources_released || exit 1
#关闭进程后等待一段时间,防止端口占用
sleep 1
sleep 5
# 清空 runtime.cf 中的所有进程配置
sed -i '/cfg_stat_data/d' /home/pq/FeProject/etc/runtime.cf
@@ -235,14 +469,13 @@ handle_add() {
}
# 获取当前脚本的进程ID
CURRENT_PID=$$
#CURRENT_PID=$$
# 检查是否有其他的set_process.sh脚本正在运行排除当前脚本
if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then
echo "set_process.sh is already running. Exiting..."
echo "set_process.sh is already running. Exiting..." >>"$LOGFILE"
exit 1
fi
#if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then
# echo "set_process.sh is already running. Exiting..."
# echo "set_process.sh is already running. Exiting..." >>"$LOGFILE"
# exit 1
#fi
#脚本应该等待3秒钟
sleep 3