Files
microser/set_process.sh

498 lines
16 KiB
Bash
Raw Normal View History

2025-07-31 17:19:10 +08:00
#!/bin/bash
2025-01-16 16:17:01 +08:00
# @file: set_process.sh
# @brief: 系统应用添加进程脚本
# @version: 1.0
# @date: 2024/12/31 10:22:43
# @author: lunankun
#前置all的重置或者新增都是由稳态的第一个进程来处理所有进程收到这条消息后先判断自己的进程号是否是1而且是稳态否则不处理所有操作均由这个进程完成
2026-03-23 16:11:17 +08:00
if [ -z "$SETSID" ]; then
export SETSID=1
nohup setsid "$0" "$@" >> /tmp/set_process_detach.log 2>&1 < /dev/null &
exit 0
fi
# 关闭从父进程继承来的 socket / pipe / 文件描述符
for fd_path in /proc/$$/fd/*; do
fd_num=$(basename "$fd_path")
case "$fd_num" in
0|1|2) ;;
*) eval "exec ${fd_num}>&-" 2>/dev/null ;;
esac
done
2025-01-16 16:17:01 +08:00
# 设置日志文件路径
LOGFILE="$FEP_ENV/dat/log/start_fe.log"
2026-03-23 16:11:17 +08:00
INI_FILE="/FeProject/etc/config/mykafka.ini"
LOCK_FILE="/tmp/set_process.lock"
if [ -f "$LOCK_FILE" ]; then
old_pid=$(cat "$LOCK_FILE")
if ps -p "$old_pid" > /dev/null 2>&1; then
echo "Already running: $old_pid"
exit 1
else
echo "Stale lock found, removing"
rm -f "$LOCK_FILE"
fi
fi
echo $$ > "$LOCK_FILE"
trap "rm -f $LOCK_FILE" EXIT
get_ini_value() {
local key="$1"
local line
line=$(grep -E "^[[:space:]]*${key}=" "$INI_FILE" | tail -n 1)
[ -z "$line" ] && return 1
# 去掉 key=
line="${line#*=}"
# 去掉首尾空格
line=$(echo "$line" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//')
[ -n "$line" ] || return 1
echo "$line"
return 0
}
log() {
echo "$@" | tee -a "$LOGFILE"
}
check_local_port_released() {
local PORT="$1"
for retry in $(seq 1 3); do
for i in $(seq 1 20); do
if ! ss -lntp 2>/dev/null | grep -q ":${PORT}[[:space:]]"; then
log "[OK] Local port $PORT released"
return 0
fi
log "[WAIT] Local port $PORT still in use... ($i/20)"
sleep 1
done
log "[RETRY] Local port $PORT not released, retry $retry/3"
sleep 1
done
log "[FAIL] Local port $PORT still in use after retries:"
ss -lntp 2>/dev/null | grep ":${PORT}[[:space:]]" | tee -a "$LOGFILE"
return 1
}
check_remote_conn_released() {
local REMOTE_IP="$1"
local REMOTE_PORT="$2"
for retry in $(seq 1 3); do
for i in $(seq 1 20); do
if ! ss -ntp 2>/dev/null \
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' >/dev/null; then
log "[OK] Remote ${REMOTE_IP}:${REMOTE_PORT} released"
return 0
fi
log "[WAIT] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists... ($i/20)"
ss -ntp 2>/dev/null \
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \
| tee -a "$LOGFILE"
sleep 1
done
log "[RETRY] Remote ${REMOTE_IP}:${REMOTE_PORT} not released, retry $retry/3"
sleep 1
done
log "[FAIL] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists:"
ss -ntp 2>/dev/null \
| grep "${REMOTE_IP}:${REMOTE_PORT}" \
| grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \
| tee -a "$LOGFILE"
return 1
}
check_value_released() {
local key="$1"
local value="$2"
[ -z "$value" ] && return 0
# 纯端口
if echo "$value" | grep -Eq '^[0-9]+$'; then
[ "$value" = "0" ] && { log "[SKIP] $key disabled"; return 0; }
log "[CHECK] $key local port: $value"
check_local_port_released "$value"
return $?
fi
# URL
if echo "$value" | grep -Eq '^[a-zA-Z]+://'; then
local hostport ip port
hostport=$(echo "$value" | sed -n 's#^[a-zA-Z]\+://\([^/]*\).*#\1#p')
ip="${hostport%%:*}"
port="${hostport##*:}"
if [ -n "$ip" ] && [ -n "$port" ] && [ "$ip" != "$port" ]; then
log "[CHECK] $key remote: $ip:$port"
check_remote_conn_released "$ip" "$port"
return $?
fi
log "[SKIP] $key invalid URL: $value"
return 0
fi
# ip:port
if echo "$value" | grep -Eq '^[^:]+:[0-9]+$'; then
local ip="${value%%:*}"
local port="${value##*:}"
log "[CHECK] $key remote: $ip:$port"
check_remote_conn_released "$ip" "$port"
return $?
fi
log "[SKIP] $key unsupported value: $value"
return 0
}
check_key_released() {
local key="$1"
local value
value=$(get_ini_value "$key") || {
log "[SKIP] $key not found"
return 0
}
log "==== Checking $key = $value ===="
check_value_released "$key" "$value"
return $?
}
check_all_resources_released() {
local ret=0
log "=============================="
log " Start checking resources..."
log "=============================="
# 本地端口
check_key_released "TestPort" || ret=1
check_key_released "HttpPort" || ret=1
check_key_released "SocketPort" || ret=1
# MQ
check_key_released "Ipport" || ret=1
check_key_released "ConsumerIpport" || ret=1
# Web
check_key_released "WebDevice" || ret=1
check_key_released "WebIcd" || ret=1
check_key_released "WebIntegrity" || ret=1
check_key_released "WebComflag" || ret=1
check_key_released "WebEvent" || ret=1
check_key_released "WebFileupload" || ret=1
check_key_released "WebFiledownload" || ret=1
if [ $ret -eq 0 ]; then
log "✅ ALL resources released"
else
log "❌ Some resources NOT released"
fi
return $ret
}
2025-01-16 16:17:01 +08:00
# 输出当前时间并打印进程停止信息
echo "" ; echo ""
echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******"
echo "" >>"$LOGFILE"
echo "" >>"$LOGFILE"
echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******" >>"$LOGFILE"
# 函数检查并处理日志文件大小
check_log_file() {
2026-03-23 16:11:17 +08:00
if [ -n "$1" ] && [ -f "$1" ]; then
2025-01-16 16:17:01 +08:00
FILE_SIZE=0
FILE_SIZE=$(du "$1" | awk '{print $1}')
if [ $FILE_SIZE -ge 5120 ]; then
if [ -f "$1".3 ]; then
rm -f "$1".3
fi
if [ -f "$1".2 ]; then
mv "$1".2 "$1".3
fi
if [ -f "$1".1 ]; then
mv "$1".1 "$1".2
fi
mv "$1" "$1".1
fi
fi
}
# 调用检查日志文件大小的函数
check_log_file $LOGFILE
# 定义查找并杀死进程的函数
kill_process_by_name() {
2026-03-23 16:11:17 +08:00
local PROCESS_NAME="$1"
local PIDS
PIDS=$(ps -ef | grep "$PROCESS_NAME" | grep -v grep | awk '{print $2}')
2025-01-16 16:17:01 +08:00
2026-03-23 16:11:17 +08:00
if [ -n "$PIDS" ]; then
echo "Found process '$PROCESS_NAME' with PID(s): $PIDS"
2025-01-16 16:17:01 +08:00
echo "Killing process..."
2026-03-23 16:11:17 +08:00
for pid in $PIDS; do
kill -15 "$pid" 2>/dev/null
done
sleep 3
2026-03-23 16:11:17 +08:00
for pid in $PIDS; do
if ps -p "$pid" >/dev/null 2>&1; then
echo "Process still exists, force kill: $pid"
kill -9 "$pid" 2>/dev/null
fi
done
2025-01-16 16:17:01 +08:00
else
echo "Process '$PROCESS_NAME' not found."
fi
}
2026-03-23 16:11:17 +08:00
wait_all_exit() {
for i in $(seq 1 30); do
COUNT=$(ps -ef | grep -E 'pt61850netd_pqfe|fe_watchdog' | grep -v grep | wc -l)
if [ "$COUNT" -eq 0 ]; then
echo "All FE processes exited"
return 0
fi
echo "Waiting FE processes exit... ($COUNT still running)"
sleep 1
done
echo "Timeout waiting FE processes exit"
return 1
}
2025-01-16 16:17:01 +08:00
# 功能块开始
handle_reset() {
# 功能reset
# 进程号:$1
# 进程类型:$2
if [ "$1" -ge 1 ] && [ "$1" -lt 10 ]; then
if [ "$2" == "all" ]; then
# 关闭旧的看门狗进程
2026-03-23 16:11:17 +08:00
#kill_process_by_name "/FeProject/bin/fe_watchdog"
2025-01-16 16:17:01 +08:00
# 关闭旧的 stat 进程
2026-03-23 16:11:17 +08:00
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data"
2025-01-16 16:17:01 +08:00
# 关闭旧的 recall 进程
2026-03-23 16:11:17 +08:00
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data"
2025-01-16 16:17:01 +08:00
# 关闭旧的 3s 进程
2026-03-23 16:11:17 +08:00
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data"
2025-01-16 16:17:01 +08:00
# 关闭旧的 comtrade 进程
2026-03-23 16:11:17 +08:00
#kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade"
/home/pq/FeProject/boot/stop_fe.sh
wait_all_exit || exit 1
check_all_resources_released || exit 1
2025-01-16 16:17:01 +08:00
#关闭进程后等待一段时间,防止端口占用
2026-03-23 16:11:17 +08:00
sleep 5
2025-01-16 16:17:01 +08:00
# 清空 runtime.cf 中的所有进程配置
sed -i '/cfg_stat_data/d' /home/pq/FeProject/etc/runtime.cf
sed -i '/cfg_recallhis_data/d' /home/pq/FeProject/etc/runtime.cf
sed -i '/cfg_3s_data/d' /home/pq/FeProject/etc/runtime.cf
sed -i '/cfg_soe_comtrade/d' /home/pq/FeProject/etc/runtime.cf
sed -i '/fe_watchdog/d' /home/pq/FeProject/etc/runtime.cf
# 根据进程号添加对应进程配置
if [ "$1" -eq 1 ]; then
#看门狗固定放在第一个防止stop时会把要杀死的进程重启
2025-07-31 17:19:10 +08:00
sed -i "2a\\$(printf '/FeProject/bin/ ^ fe_watchdog -m 18192 ^ ^ ^ 1 ^ IGNORE_RESTART ^\n')" /home/pq/FeProject/etc/runtime.cf
2025-01-16 16:17:01 +08:00
# 进程号为 1按固定格式添加
sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s 1_1^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf
sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s 1_1^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf
sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_3s_data^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf
sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_soe_comtrade^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf
else
#看门狗固定放在第一个防止stop时会把要杀死的进程重启
2025-07-31 17:19:10 +08:00
sed -i "2a\\$(printf '/FeProject/bin/ ^ fe_watchdog -m 18192 ^ ^ ^ 1 ^ IGNORE_RESTART ^\n')" /home/pq/FeProject/etc/runtime.cf
2025-01-16 16:17:01 +08:00
# 进程号大于 1按 -s ${i}_ 格式添加
for i in $(seq 1 $1); do
# 在 runtime.cf 中插入对应的配置行,直接插入变量替换结果
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
done
#以下部分没有多进程
sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_3s_data^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf
sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_soe_comtrade^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf
fi
# 修改后等一下
sleep 1
2025-01-16 16:17:01 +08:00
# 确保文件已被写入并刷新
sync
# 重新启动服务
/home/pq/FeProject/boot/start_fe.sh
echo "****** reset all in $1******" >>"$LOGFILE"
elif [ "$2" == "stat" ]; then
# 清空 runtime.cf 中包含 cfg_stat_data 的行
sed -i '/cfg_stat_data/d' /home/pq/FeProject/etc/runtime.cf
# 根据进程号来添加新的进程配置
for i in $(seq 1 $1); do
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
done
# 关闭旧的看门狗进程
kill_process_by_name "/FeProject/bin/fe_watchdog"
# 关闭旧的 stat 进程
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data"
#sleep 5
# 启动服务不影响其他功能的进程
/home/pq/FeProject/boot/start_fe.sh
echo "****** reset stat in $1******" >>"$LOGFILE"
elif [ "$2" == "recall" ]; then
# 清空 runtime.cf 中包含 cfg_recallhis_data 的行
sed -i '/cfg_recallhis_data/d' /home/pq/FeProject/etc/runtime.cf
# 根据进程号来添加新的进程配置
for i in $(seq 1 $1); do
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
done
# 关闭旧的看门狗进程
kill_process_by_name "/FeProject/bin/fe_watchdog"
# 关闭旧的 recall 进程
kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data"
#sleep 5
# 启动服务不影响其他功能的进程
/home/pq/FeProject/boot/start_fe.sh
echo "****** reset recall in $1******" >>"$LOGFILE"
else
echo "****** process reset type null ******"
echo "****** process reset type null ******" >>"$LOGFILE"
fi
else
# 如果 $1 不在 1 到 10 之间,记录错误日志
echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******"
echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******" >>"$LOGFILE"
fi
}
handle_add() {
# 功能add
# 进程号:$1
# 进程类型:$2
if [ "$1" -gt 1 ] && [ "$1" -lt 10 ]; then
# 如果 $1 在 1 和 10 之间,进入处理逻辑
if [ "$2" == "all" ]; then
# 检查是否已存在该条目,避免重复添加
if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
fi
if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
fi
elif [ "$2" == "stat" ]; then
# 检查是否已存在该条目,避免重复添加
if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
fi
elif [ "$2" == "recall" ]; then
# 检查是否已存在该条目,避免重复添加
if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then
sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf
fi
else
echo "****** process add type null ******"
echo "****** process add type null ******" >> "$LOGFILE"
fi
# 关闭旧的看门狗进程
kill_process_by_name "/FeProject/bin/fe_watchdog"
#sleep 5
# 启动服务,不影响正在运行的进程
/home/pq/FeProject/boot/start_fe.sh
else
# 如果 $1 不在 1 到 10 之间,记录错误日志
echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******"
echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******" >> "$LOGFILE"
fi
echo "****** add $2 -- $1******" >>"$LOGFILE"
}
# 获取当前脚本的进程ID
2026-03-23 16:11:17 +08:00
#CURRENT_PID=$$
2025-01-16 16:17:01 +08:00
# 检查是否有其他的set_process.sh脚本正在运行排除当前脚本
2026-03-23 16:11:17 +08:00
#if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then
# echo "set_process.sh is already running. Exiting..."
# echo "set_process.sh is already running. Exiting..." >>"$LOGFILE"
# exit 1
#fi
2025-01-16 16:17:01 +08:00
#脚本应该等待3秒钟
sleep 3
# 根据入参判断是 reset 还是 add
if [ "$1" == "reset" ]; then
handle_reset $2 $3
elif [ "$1" == "add" ]; then
handle_add $2 $3
else
echo "Invalid option. Usage: $0 {reset|add} {process_number} {process_type}"
exit 1
fi
# 获取当前时间并记录进程添加成功的日志
DT=$(date "+%F %R:%S.%N")
echo "****** ${DT:0:23} set Processes Successfully ******"
echo "" >>"$LOGFILE"
echo "****** ${DT:0:23} set Processes Successfully ******" >>"$LOGFILE"