fix process control script
This commit is contained in:
281
set_process.sh
281
set_process.sh
@@ -8,9 +8,216 @@
|
||||
|
||||
#前置all的重置或者新增都是由稳态的第一个进程来处理,所有进程收到这条消息后先判断自己的进程号是否是1,而且是稳态,否则不处理,所有操作均由这个进程完成,
|
||||
|
||||
if [ -z "$SETSID" ]; then
|
||||
export SETSID=1
|
||||
nohup setsid "$0" "$@" >> /tmp/set_process_detach.log 2>&1 < /dev/null &
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 关闭从父进程继承来的 socket / pipe / 文件描述符
|
||||
for fd_path in /proc/$$/fd/*; do
|
||||
fd_num=$(basename "$fd_path")
|
||||
case "$fd_num" in
|
||||
0|1|2) ;;
|
||||
*) eval "exec ${fd_num}>&-" 2>/dev/null ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 设置日志文件路径
|
||||
LOGFILE="$FEP_ENV/dat/log/start_fe.log"
|
||||
|
||||
INI_FILE="/FeProject/etc/config/mykafka.ini"
|
||||
|
||||
LOCK_FILE="/tmp/set_process.lock"
|
||||
|
||||
if [ -f "$LOCK_FILE" ]; then
|
||||
old_pid=$(cat "$LOCK_FILE")
|
||||
if ps -p "$old_pid" > /dev/null 2>&1; then
|
||||
echo "Already running: $old_pid"
|
||||
exit 1
|
||||
else
|
||||
echo "Stale lock found, removing"
|
||||
rm -f "$LOCK_FILE"
|
||||
fi
|
||||
fi
|
||||
|
||||
echo $$ > "$LOCK_FILE"
|
||||
trap "rm -f $LOCK_FILE" EXIT
|
||||
|
||||
get_ini_value() {
|
||||
local key="$1"
|
||||
local line
|
||||
|
||||
line=$(grep -E "^[[:space:]]*${key}=" "$INI_FILE" | tail -n 1)
|
||||
[ -z "$line" ] && return 1
|
||||
|
||||
# 去掉 key=
|
||||
line="${line#*=}"
|
||||
# 去掉首尾空格
|
||||
line=$(echo "$line" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//')
|
||||
|
||||
[ -n "$line" ] || return 1
|
||||
echo "$line"
|
||||
return 0
|
||||
}
|
||||
|
||||
log() {
|
||||
echo "$@" | tee -a "$LOGFILE"
|
||||
}
|
||||
|
||||
check_local_port_released() {
|
||||
local PORT="$1"
|
||||
|
||||
for retry in $(seq 1 3); do
|
||||
for i in $(seq 1 20); do
|
||||
if ! ss -lntp 2>/dev/null | grep -q ":${PORT}[[:space:]]"; then
|
||||
log "[OK] Local port $PORT released"
|
||||
return 0
|
||||
fi
|
||||
log "[WAIT] Local port $PORT still in use... ($i/20)"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log "[RETRY] Local port $PORT not released, retry $retry/3"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log "[FAIL] Local port $PORT still in use after retries:"
|
||||
ss -lntp 2>/dev/null | grep ":${PORT}[[:space:]]" | tee -a "$LOGFILE"
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
check_remote_conn_released() {
|
||||
local REMOTE_IP="$1"
|
||||
local REMOTE_PORT="$2"
|
||||
|
||||
for retry in $(seq 1 3); do
|
||||
for i in $(seq 1 20); do
|
||||
if ! ss -ntp 2>/dev/null \
|
||||
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
|
||||
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' >/dev/null; then
|
||||
log "[OK] Remote ${REMOTE_IP}:${REMOTE_PORT} released"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "[WAIT] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists... ($i/20)"
|
||||
ss -ntp 2>/dev/null \
|
||||
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
|
||||
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \
|
||||
| tee -a "$LOGFILE"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log "[RETRY] Remote ${REMOTE_IP}:${REMOTE_PORT} not released, retry $retry/3"
|
||||
sleep 1
|
||||
done
|
||||
|
||||
log "[FAIL] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists:"
|
||||
ss -ntp 2>/dev/null \
|
||||
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
|
||||
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \
|
||||
| tee -a "$LOGFILE"
|
||||
|
||||
return 1
|
||||
}
|
||||
|
||||
check_value_released() {
|
||||
local key="$1"
|
||||
local value="$2"
|
||||
|
||||
[ -z "$value" ] && return 0
|
||||
|
||||
# 纯端口
|
||||
if echo "$value" | grep -Eq '^[0-9]+$'; then
|
||||
[ "$value" = "0" ] && { log "[SKIP] $key disabled"; return 0; }
|
||||
|
||||
log "[CHECK] $key local port: $value"
|
||||
check_local_port_released "$value"
|
||||
return $?
|
||||
fi
|
||||
|
||||
# URL
|
||||
if echo "$value" | grep -Eq '^[a-zA-Z]+://'; then
|
||||
local hostport ip port
|
||||
hostport=$(echo "$value" | sed -n 's#^[a-zA-Z]\+://\([^/]*\).*#\1#p')
|
||||
ip="${hostport%%:*}"
|
||||
port="${hostport##*:}"
|
||||
|
||||
if [ -n "$ip" ] && [ -n "$port" ] && [ "$ip" != "$port" ]; then
|
||||
log "[CHECK] $key remote: $ip:$port"
|
||||
check_remote_conn_released "$ip" "$port"
|
||||
return $?
|
||||
fi
|
||||
|
||||
log "[SKIP] $key invalid URL: $value"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# ip:port
|
||||
if echo "$value" | grep -Eq '^[^:]+:[0-9]+$'; then
|
||||
local ip="${value%%:*}"
|
||||
local port="${value##*:}"
|
||||
|
||||
log "[CHECK] $key remote: $ip:$port"
|
||||
check_remote_conn_released "$ip" "$port"
|
||||
return $?
|
||||
fi
|
||||
|
||||
log "[SKIP] $key unsupported value: $value"
|
||||
return 0
|
||||
}
|
||||
|
||||
check_key_released() {
|
||||
local key="$1"
|
||||
local value
|
||||
|
||||
value=$(get_ini_value "$key") || {
|
||||
log "[SKIP] $key not found"
|
||||
return 0
|
||||
}
|
||||
|
||||
log "==== Checking $key = $value ===="
|
||||
check_value_released "$key" "$value"
|
||||
return $?
|
||||
}
|
||||
|
||||
check_all_resources_released() {
|
||||
local ret=0
|
||||
|
||||
log "=============================="
|
||||
log " Start checking resources..."
|
||||
log "=============================="
|
||||
|
||||
# 本地端口
|
||||
check_key_released "TestPort" || ret=1
|
||||
check_key_released "HttpPort" || ret=1
|
||||
check_key_released "SocketPort" || ret=1
|
||||
|
||||
# MQ
|
||||
check_key_released "Ipport" || ret=1
|
||||
check_key_released "ConsumerIpport" || ret=1
|
||||
|
||||
# Web
|
||||
check_key_released "WebDevice" || ret=1
|
||||
check_key_released "WebIcd" || ret=1
|
||||
check_key_released "WebIntegrity" || ret=1
|
||||
check_key_released "WebComflag" || ret=1
|
||||
check_key_released "WebEvent" || ret=1
|
||||
check_key_released "WebFileupload" || ret=1
|
||||
check_key_released "WebFiledownload" || ret=1
|
||||
|
||||
if [ $ret -eq 0 ]; then
|
||||
log "✅ ALL resources released"
|
||||
else
|
||||
log "❌ Some resources NOT released"
|
||||
fi
|
||||
|
||||
return $ret
|
||||
}
|
||||
|
||||
|
||||
|
||||
# 输出当前时间并打印进程停止信息
|
||||
echo "" ; echo ""
|
||||
echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******"
|
||||
@@ -20,7 +227,7 @@ echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******" >>"$
|
||||
|
||||
# 函数检查并处理日志文件大小
|
||||
check_log_file() {
|
||||
if [ -n "$1" ]; then
|
||||
if [ -n "$1" ] && [ -f "$1" ]; then
|
||||
FILE_SIZE=0
|
||||
FILE_SIZE=$(du "$1" | awk '{print $1}')
|
||||
|
||||
@@ -45,25 +252,46 @@ check_log_file $LOGFILE
|
||||
|
||||
# 定义查找并杀死进程的函数
|
||||
kill_process_by_name() {
|
||||
PROCESS_NAME=$1
|
||||
PID=$(ps -ef | grep "$PROCESS_NAME" | grep -v "grep" | awk '{print $2}')
|
||||
local PROCESS_NAME="$1"
|
||||
local PIDS
|
||||
|
||||
if [ -n "$PID" ]; then
|
||||
echo "Found process '$PROCESS_NAME' with PID: $PID"
|
||||
PIDS=$(ps -ef | grep "$PROCESS_NAME" | grep -v grep | awk '{print $2}')
|
||||
|
||||
if [ -n "$PIDS" ]; then
|
||||
echo "Found process '$PROCESS_NAME' with PID(s): $PIDS"
|
||||
echo "Killing process..."
|
||||
kill -15 $PID
|
||||
|
||||
for pid in $PIDS; do
|
||||
kill -15 "$pid" 2>/dev/null
|
||||
done
|
||||
|
||||
sleep 3
|
||||
|
||||
PID2=$(ps -ef | grep "$PROCESS_NAME" | grep -v "grep" | awk '{print $2}')
|
||||
if [ -n "$PID2" ]; then
|
||||
echo "Process still exists, force kill: $PID2"
|
||||
kill -9 $PID2
|
||||
fi
|
||||
for pid in $PIDS; do
|
||||
if ps -p "$pid" >/dev/null 2>&1; then
|
||||
echo "Process still exists, force kill: $pid"
|
||||
kill -9 "$pid" 2>/dev/null
|
||||
fi
|
||||
done
|
||||
else
|
||||
echo "Process '$PROCESS_NAME' not found."
|
||||
fi
|
||||
}
|
||||
|
||||
wait_all_exit() {
|
||||
for i in $(seq 1 30); do
|
||||
COUNT=$(ps -ef | grep -E 'pt61850netd_pqfe|fe_watchdog' | grep -v grep | wc -l)
|
||||
if [ "$COUNT" -eq 0 ]; then
|
||||
echo "All FE processes exited"
|
||||
return 0
|
||||
fi
|
||||
echo "Waiting FE processes exit... ($COUNT still running)"
|
||||
sleep 1
|
||||
done
|
||||
echo "Timeout waiting FE processes exit"
|
||||
return 1
|
||||
}
|
||||
|
||||
# 功能块开始
|
||||
handle_reset() {
|
||||
# 功能:reset
|
||||
@@ -73,22 +301,28 @@ handle_reset() {
|
||||
if [ "$2" == "all" ]; then
|
||||
|
||||
# 关闭旧的看门狗进程
|
||||
kill_process_by_name "/FeProject/bin/fe_watchdog"
|
||||
#kill_process_by_name "/FeProject/bin/fe_watchdog"
|
||||
|
||||
# 关闭旧的 stat 进程
|
||||
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data"
|
||||
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data"
|
||||
|
||||
# 关闭旧的 recall 进程
|
||||
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data"
|
||||
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data"
|
||||
|
||||
# 关闭旧的 3s 进程
|
||||
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data"
|
||||
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data"
|
||||
|
||||
# 关闭旧的 comtrade 进程
|
||||
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade"
|
||||
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade"
|
||||
|
||||
/home/pq/FeProject/boot/stop_fe.sh
|
||||
|
||||
wait_all_exit || exit 1
|
||||
|
||||
check_all_resources_released || exit 1
|
||||
|
||||
#关闭进程后等待一段时间,防止端口占用
|
||||
sleep 1
|
||||
sleep 5
|
||||
|
||||
# 清空 runtime.cf 中的所有进程配置
|
||||
sed -i '/cfg_stat_data/d' /home/pq/FeProject/etc/runtime.cf
|
||||
@@ -235,14 +469,13 @@ handle_add() {
|
||||
}
|
||||
|
||||
# 获取当前脚本的进程ID
|
||||
CURRENT_PID=$$
|
||||
|
||||
#CURRENT_PID=$$
|
||||
# 检查是否有其他的set_process.sh脚本正在运行,排除当前脚本
|
||||
if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then
|
||||
echo "set_process.sh is already running. Exiting..."
|
||||
echo "set_process.sh is already running. Exiting..." >>"$LOGFILE"
|
||||
exit 1
|
||||
fi
|
||||
#if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then
|
||||
# echo "set_process.sh is already running. Exiting..."
|
||||
# echo "set_process.sh is already running. Exiting..." >>"$LOGFILE"
|
||||
# exit 1
|
||||
#fi
|
||||
|
||||
#脚本应该等待3秒钟
|
||||
sleep 3
|
||||
|
||||
Reference in New Issue
Block a user