#!/bin/bash # @file: set_process.sh # @brief: 系统应用添加进程脚本 # @version: 1.0 # @date: 2024/12/31 10:22:43 # @author: lunankun #前置all的重置或者新增都是由稳态的第一个进程来处理,所有进程收到这条消息后先判断自己的进程号是否是1,而且是稳态,否则不处理,所有操作均由这个进程完成, if [ -z "$SETSID" ]; then export SETSID=1 nohup setsid "$0" "$@" >> /tmp/set_process_detach.log 2>&1 < /dev/null & exit 0 fi # 关闭从父进程继承来的 socket / pipe / 文件描述符 for fd_path in /proc/$$/fd/*; do fd_num=$(basename "$fd_path") case "$fd_num" in 0|1|2) ;; *) eval "exec ${fd_num}>&-" 2>/dev/null ;; esac done # 设置日志文件路径 LOGFILE="$FEP_ENV/dat/log/start_fe.log" INI_FILE="/FeProject/etc/config/mykafka.ini" LOCK_FILE="/tmp/set_process.lock" if [ -f "$LOCK_FILE" ]; then old_pid=$(cat "$LOCK_FILE") if ps -p "$old_pid" > /dev/null 2>&1; then echo "Already running: $old_pid" exit 1 else echo "Stale lock found, removing" rm -f "$LOCK_FILE" fi fi echo $$ > "$LOCK_FILE" trap "rm -f $LOCK_FILE" EXIT get_ini_value() { local key="$1" local line line=$(grep -E "^[[:space:]]*${key}=" "$INI_FILE" | tail -n 1) [ -z "$line" ] && return 1 # 去掉 key= line="${line#*=}" # 去掉首尾空格 line=$(echo "$line" | sed 's/^[[:space:]]*//; s/[[:space:]]*$//') [ -n "$line" ] || return 1 echo "$line" return 0 } log() { echo "$@" | tee -a "$LOGFILE" } check_local_port_released() { local PORT="$1" for retry in $(seq 1 3); do for i in $(seq 1 20); do if ! ss -lntp 2>/dev/null | grep -q ":${PORT}[[:space:]]"; then log "[OK] Local port $PORT released" return 0 fi log "[WAIT] Local port $PORT still in use... ($i/20)" sleep 1 done log "[RETRY] Local port $PORT not released, retry $retry/3" sleep 1 done log "[FAIL] Local port $PORT still in use after retries:" ss -lntp 2>/dev/null | grep ":${PORT}[[:space:]]" | tee -a "$LOGFILE" return 1 } check_remote_conn_released() { local REMOTE_IP="$1" local REMOTE_PORT="$2" for retry in $(seq 1 3); do for i in $(seq 1 20); do if ! ss -ntp 2>/dev/null \ | grep "${REMOTE_IP}:${REMOTE_PORT}" \ | grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' >/dev/null; then log "[OK] Remote ${REMOTE_IP}:${REMOTE_PORT} released" return 0 fi log "[WAIT] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists... ($i/20)" ss -ntp 2>/dev/null \ | grep "${REMOTE_IP}:${REMOTE_PORT}" \ | grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \ | tee -a "$LOGFILE" sleep 1 done log "[RETRY] Remote ${REMOTE_IP}:${REMOTE_PORT} not released, retry $retry/3" sleep 1 done log "[FAIL] Remote ${REMOTE_IP}:${REMOTE_PORT} still exists:" ss -ntp 2>/dev/null \ | grep "${REMOTE_IP}:${REMOTE_PORT}" \ | grep -E 'pt61850netd_pqfe|fe_watchdog|fe_main' \ | tee -a "$LOGFILE" return 1 } check_value_released() { local key="$1" local value="$2" [ -z "$value" ] && return 0 # 纯端口 if echo "$value" | grep -Eq '^[0-9]+$'; then [ "$value" = "0" ] && { log "[SKIP] $key disabled"; return 0; } log "[CHECK] $key local port: $value" check_local_port_released "$value" return $? fi # URL if echo "$value" | grep -Eq '^[a-zA-Z]+://'; then local hostport ip port hostport=$(echo "$value" | sed -n 's#^[a-zA-Z]\+://\([^/]*\).*#\1#p') ip="${hostport%%:*}" port="${hostport##*:}" if [ -n "$ip" ] && [ -n "$port" ] && [ "$ip" != "$port" ]; then log "[CHECK] $key remote: $ip:$port" check_remote_conn_released "$ip" "$port" return $? fi log "[SKIP] $key invalid URL: $value" return 0 fi # ip:port if echo "$value" | grep -Eq '^[^:]+:[0-9]+$'; then local ip="${value%%:*}" local port="${value##*:}" log "[CHECK] $key remote: $ip:$port" check_remote_conn_released "$ip" "$port" return $? fi log "[SKIP] $key unsupported value: $value" return 0 } check_key_released() { local key="$1" local value value=$(get_ini_value "$key") || { log "[SKIP] $key not found" return 0 } log "==== Checking $key = $value ====" check_value_released "$key" "$value" return $? } check_all_resources_released() { local ret=0 log "==============================" log " Start checking resources..." log "==============================" # 本地端口 check_key_released "TestPort" || ret=1 check_key_released "HttpPort" || ret=1 check_key_released "SocketPort" || ret=1 # MQ check_key_released "Ipport" || ret=1 check_key_released "ConsumerIpport" || ret=1 # Web check_key_released "WebDevice" || ret=1 check_key_released "WebIcd" || ret=1 check_key_released "WebIntegrity" || ret=1 check_key_released "WebComflag" || ret=1 check_key_released "WebEvent" || ret=1 check_key_released "WebFileupload" || ret=1 check_key_released "WebFiledownload" || ret=1 if [ $ret -eq 0 ]; then log "✅ ALL resources released" else log "❌ Some resources NOT released" fi return $ret } # 输出当前时间并打印进程停止信息 echo "" ; echo "" echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******" echo "" >>"$LOGFILE" echo "" >>"$LOGFILE" echo "****** `date "+%F %R:%S"` start setting Processes after 3 sec ******" >>"$LOGFILE" # 函数检查并处理日志文件大小 check_log_file() { if [ -n "$1" ] && [ -f "$1" ]; then FILE_SIZE=0 FILE_SIZE=$(du "$1" | awk '{print $1}') if [ $FILE_SIZE -ge 5120 ]; then if [ -f "$1".3 ]; then rm -f "$1".3 fi if [ -f "$1".2 ]; then mv "$1".2 "$1".3 fi if [ -f "$1".1 ]; then mv "$1".1 "$1".2 fi mv "$1" "$1".1 fi fi } # 调用检查日志文件大小的函数 check_log_file $LOGFILE # 定义查找并杀死进程的函数 kill_process_by_name() { local PROCESS_NAME="$1" local PIDS PIDS=$(ps -ef | grep "$PROCESS_NAME" | grep -v grep | awk '{print $2}') if [ -n "$PIDS" ]; then echo "Found process '$PROCESS_NAME' with PID(s): $PIDS" echo "Killing process..." for pid in $PIDS; do kill -15 "$pid" 2>/dev/null done sleep 3 for pid in $PIDS; do if ps -p "$pid" >/dev/null 2>&1; then echo "Process still exists, force kill: $pid" kill -9 "$pid" 2>/dev/null fi done else echo "Process '$PROCESS_NAME' not found." fi } wait_all_exit() { for i in $(seq 1 30); do COUNT=$(ps -ef | grep -E 'pt61850netd_pqfe|fe_watchdog' | grep -v grep | wc -l) if [ "$COUNT" -eq 0 ]; then echo "All FE processes exited" return 0 fi echo "Waiting FE processes exit... ($COUNT still running)" sleep 1 done echo "Timeout waiting FE processes exit" return 1 } # 功能块开始 handle_reset() { # 功能:reset # 进程号:$1 # 进程类型:$2 if [ "$1" -ge 1 ] && [ "$1" -lt 10 ]; then if [ "$2" == "all" ]; then # 关闭旧的看门狗进程 #kill_process_by_name "/FeProject/bin/fe_watchdog" # 关闭旧的 stat 进程 #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data" # 关闭旧的 recall 进程 #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data" # 关闭旧的 3s 进程 #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_3s_data" # 关闭旧的 comtrade 进程 #kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_soe_comtrade" /home/pq/FeProject/boot/stop_fe.sh wait_all_exit || exit 1 check_all_resources_released || exit 1 #关闭进程后等待一段时间,防止端口占用 sleep 5 # 清空 runtime.cf 中的所有进程配置 sed -i '/cfg_stat_data/d' /home/pq/FeProject/etc/runtime.cf sed -i '/cfg_recallhis_data/d' /home/pq/FeProject/etc/runtime.cf sed -i '/cfg_3s_data/d' /home/pq/FeProject/etc/runtime.cf sed -i '/cfg_soe_comtrade/d' /home/pq/FeProject/etc/runtime.cf sed -i '/fe_watchdog/d' /home/pq/FeProject/etc/runtime.cf # 根据进程号添加对应进程配置 if [ "$1" -eq 1 ]; then #看门狗固定放在第一个,防止stop时会把要杀死的进程重启 sed -i "2a\\$(printf '/FeProject/bin/ ^ fe_watchdog -m 18192 ^ ^ ^ 1 ^ IGNORE_RESTART ^\n')" /home/pq/FeProject/etc/runtime.cf # 进程号为 1,按固定格式添加 sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s 1_1^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s 1_1^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_3s_data^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_soe_comtrade^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf else #看门狗固定放在第一个,防止stop时会把要杀死的进程重启 sed -i "2a\\$(printf '/FeProject/bin/ ^ fe_watchdog -m 18192 ^ ^ ^ 1 ^ IGNORE_RESTART ^\n')" /home/pq/FeProject/etc/runtime.cf # 进程号大于 1,按 -s ${i}_ 格式添加 for i in $(seq 1 $1); do # 在 runtime.cf 中插入对应的配置行,直接插入变量替换结果 sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf done #以下部分没有多进程 sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_3s_data^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf sed -i "2a\\$(printf '/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_soe_comtrade^ ^ ^ 1 ^ ^\n')" /home/pq/FeProject/etc/runtime.cf fi # 修改后等一下 sleep 1 # 确保文件已被写入并刷新 sync # 重新启动服务 /home/pq/FeProject/boot/start_fe.sh echo "****** reset all in $1******" >>"$LOGFILE" elif [ "$2" == "stat" ]; then # 清空 runtime.cf 中包含 cfg_stat_data 的行 sed -i '/cfg_stat_data/d' /home/pq/FeProject/etc/runtime.cf # 根据进程号来添加新的进程配置 for i in $(seq 1 $1); do sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf done # 关闭旧的看门狗进程 kill_process_by_name "/FeProject/bin/fe_watchdog" # 关闭旧的 stat 进程 kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_stat_data" #sleep 5 # 启动服务不影响其他功能的进程 /home/pq/FeProject/boot/start_fe.sh echo "****** reset stat in $1******" >>"$LOGFILE" elif [ "$2" == "recall" ]; then # 清空 runtime.cf 中包含 cfg_recallhis_data 的行 sed -i '/cfg_recallhis_data/d' /home/pq/FeProject/etc/runtime.cf # 根据进程号来添加新的进程配置 for i in $(seq 1 $1); do sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${i}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf done # 关闭旧的看门狗进程 kill_process_by_name "/FeProject/bin/fe_watchdog" # 关闭旧的 recall 进程 kill_process_by_name "/FeProject/bin/pt61850netd_pqfe -d cfg_recallhis_data" #sleep 5 # 启动服务不影响其他功能的进程 /home/pq/FeProject/boot/start_fe.sh echo "****** reset recall in $1******" >>"$LOGFILE" else echo "****** process reset type null ******" echo "****** process reset type null ******" >>"$LOGFILE" fi else # 如果 $1 不在 1 到 10 之间,记录错误日志 echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******" echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******" >>"$LOGFILE" fi } handle_add() { # 功能:add # 进程号:$1 # 进程类型:$2 if [ "$1" -gt 1 ] && [ "$1" -lt 10 ]; then # 如果 $1 在 1 和 10 之间,进入处理逻辑 if [ "$2" == "all" ]; then # 检查是否已存在该条目,避免重复添加 if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf fi if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf fi elif [ "$2" == "stat" ]; then # 检查是否已存在该条目,避免重复添加 if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_stat_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf fi elif [ "$2" == "recall" ]; then # 检查是否已存在该条目,避免重复添加 if ! grep -q "/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^" /home/pq/FeProject/etc/runtime.cf; then sed -i "2a/FeProject/bin/ ^ pt61850netd_pqfe -d cfg_recallhis_data -s ${1}_${1}^ ^ ^ 1 ^ ^" /home/pq/FeProject/etc/runtime.cf fi else echo "****** process add type null ******" echo "****** process add type null ******" >> "$LOGFILE" fi # 关闭旧的看门狗进程 kill_process_by_name "/FeProject/bin/fe_watchdog" #sleep 5 # 启动服务,不影响正在运行的进程 /home/pq/FeProject/boot/start_fe.sh else # 如果 $1 不在 1 到 10 之间,记录错误日志 echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******" echo "****** Error: Invalid process number '$1'. It must be between 1 and 10. ******" >> "$LOGFILE" fi echo "****** add $2 -- $1******" >>"$LOGFILE" } # 获取当前脚本的进程ID #CURRENT_PID=$$ # 检查是否有其他的set_process.sh脚本正在运行,排除当前脚本 #if pgrep -f "set_process.sh" | grep -v "^$CURRENT_PID$" > /dev/null; then # echo "set_process.sh is already running. Exiting..." # echo "set_process.sh is already running. Exiting..." >>"$LOGFILE" # exit 1 #fi #脚本应该等待3秒钟 sleep 3 # 根据入参判断是 reset 还是 add if [ "$1" == "reset" ]; then handle_reset $2 $3 elif [ "$1" == "add" ]; then handle_add $2 $3 else echo "Invalid option. Usage: $0 {reset|add} {process_number} {process_type}" exit 1 fi # 获取当前时间并记录进程添加成功的日志 DT=$(date "+%F %R:%S.%N") echo "****** ${DT:0:23} set Processes Successfully ******" echo "" >>"$LOGFILE" echo "****** ${DT:0:23} set Processes Successfully ******" >>"$LOGFILE"