#!/bin/bash

# 获取脚本所在目录和脚本名称
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SCRIPT_NAME="$(basename "${BASH_SOURCE[0]}")"
CONFIG_FILE="$SCRIPT_DIR/config-$SCRIPT_NAME"

# 加载配置文件
if [ -f "$CONFIG_FILE" ]; then
    log() {
        echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
    }
    log "从配置文件加载配置: $CONFIG_FILE"
    source "$CONFIG_FILE"
else
    echo "错误: 配置文件不存在: $CONFIG_FILE"
    echo "请创建配置文件并设置以下变量:"
    echo "  - INAIS_HOST_ACCOUNT"
    echo "  - NODE_ID"
    echo "  - CONTAINER_NAME"
    echo "  - CONTAINER_ROOT"
    echo "  - CONTAINER_INDEX"
    echo "  - INAIS_NETWORK_MODE"
    echo "  - USE_RINETD_PORT_FORWARDING"
    echo "  - MAX_NODE_ID (可选, 默认21)"
    echo "  - DISABLE_LOCAL_STORAGE_BINDINGS (可选, 设为 true 禁用本地存储挂载)"
    echo "  - DISABLE_NVIDIA_BINDINGS (可选, 设为 true 禁用NVIDIA挂载)"
    exit 1
fi

# 派生配置（基于配置文件中的变量）
export NODE_ID_STR="$(printf '%02d\n' $NODE_ID)"
export INAIS_HOSTNAME="IN$NODE_ID_STR"
export SYSTEMD_SECCOMP=0
export MAX_NODE_ID=${MAX_NODE_ID:-21}  # 最大节点ID，默认21
export DISABLE_LOCAL_STORAGE_BINDINGS=${DISABLE_LOCAL_STORAGE_BINDINGS:-false}  # 禁用本地存储挂载

# 存储路径前缀配置默认值
export LOCAL_DATA_PREFIX=${LOCAL_DATA_PREFIX:-/data}                    # 本地数据目录前缀
export NFS_STORAGE_PREFIX=${NFS_STORAGE_PREFIX:-/storagenfs/nfs}        # NFS存储路径前缀

# 端口映射配置默认值
export PORT_FORWARD_HOST_START=${PORT_FORWARD_HOST_START:-30600}       # 主机端起始端口
export PORT_FORWARD_CONTAINER_START=${PORT_FORWARD_CONTAINER_START:-30000}  # 容器端起始端口
export PORT_FORWARD_COUNT=${PORT_FORWARD_COUNT:-20}                    # 端口映射数量

# rinetd相关配置
export RINETD_CONFIG="/var/run/rinetd.conf.$CONTAINER_NAME"
export RINETD_PID="/var/run/rinetd.pid.$CONTAINER_NAME"

# 全局变量用于控制循环
RELOAD_REQUESTED=false

# 日志函数
log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}

# 创建docker网络
create_docker_network() {
    log "创建docker网络 inais0"
    docker ps -a > /dev/null 2>&1
    docker network create \
        --driver=bridge \
        --subnet=253.${NODE_ID}.0.0/16 --gateway 253.${NODE_ID}.0.1 \
        --opt com.docker.network.bridge.name=inais0 \
        inais0 2>/dev/null || log "网络创建失败或已存在"
}

# 检查网络设备
check_network_device() {
    if [ "$INAIS_NETWORK_MODE" == "docker" ]; then
        if [ ! -d "/sys/class/net/inais0" ]; then
            log "inais0网络设备不存在，重新创建"
            create_docker_network
            return 1
        fi
    fi
    return 0
}

create_route() {
  EXISTING_ROUTES=$(ip route show)

  for i in $(seq 1 $MAX_NODE_ID); do
    if [ "$i" -eq "$NODE_ID" ]; then
      continue
    fi

    ROUTE="253.$i.0.0/16"
    GATEWAY="172.29.32.$i"

    # 检查是否已有这条路由
    if echo "$EXISTING_ROUTES" | grep -q "^$ROUTE via $GATEWAY"; then
      echo "Route $ROUTE via $GATEWAY already exists, skipping"
    else
      echo "Adding route: $ROUTE via $GATEWAY"
      ip route add "$ROUTE" via "$GATEWAY"
    fi
  done
}

# 等待存储挂载就绪
wait_for_storage_mounts() {
    local TIMEOUT=300  # 5分钟超时
    local start_time=$(date +%s)
    local end_time=$((start_time + TIMEOUT))
    
    log "等待存储挂载就绪（超时时间 ${TIMEOUT}秒）..."
    
    # 需要检查的挂载点列表
    local mount_points=()
    
    # 添加本地数据目录
    mount_points+=("${LOCAL_DATA_PREFIX}/$INAIS_HOST_ACCOUNT")
    
    # 添加所有NFS挂载点（除了当前节点）
    for i in $(seq 1 $MAX_NODE_ID); do
        if [ "$i" -eq "$NODE_ID" ]; then
            continue
        fi
        local num=$(printf "%02d" "$i")
        mount_points+=("${NFS_STORAGE_PREFIX}${num}")
    done
    
    # 检查每个挂载点
    local all_ready=false
    local check_count=0
    
    while [ "$all_ready" = false ]; do
        check_count=$((check_count + 1))
        local current_time=$(date +%s)
        local elapsed=$((current_time - start_time))
        
        all_ready=true
        local pending_mounts=()
        
        for mount_point in "${mount_points[@]}"; do
            # 检查目录是否存在
            if [ ! -d "$mount_point" ]; then
                all_ready=false
                pending_mounts+=("$mount_point (目录不存在)")
                continue
            fi
            
            # 对于NFS挂载点，必须是真正的挂载点
            if [[ "$mount_point" == /storagenfs/* ]]; then
                # 方法1: 使用 mountpoint 命令检查是否是真正的挂载点
                if ! mountpoint -q "$mount_point" 2>/dev/null; then
                    all_ready=false
                    pending_mounts+=("$mount_point (非挂载点)")
                    continue
                fi
                
                # 方法2: 检查 /proc/mounts 确认挂载类型
                if ! grep -q "^[^ ]* $mount_point " /proc/mounts 2>/dev/null; then
                    all_ready=false
                    pending_mounts+=("$mount_point (未在/proc/mounts中)")
                    continue
                fi
                
                # 方法3: 尝试访问并确保不会挂起（使用超时）
                if ! timeout 5 ls "$mount_point" >/dev/null 2>&1; then
                    all_ready=false
                    pending_mounts+=("$mount_point (访问超时或失败)")
                    continue
                fi
                
                # 方法4: 检查是否能实际读取内容（排除stale mount）
                if ! timeout 5 stat "$mount_point" >/dev/null 2>&1; then
                    all_ready=false
                    pending_mounts+=("$mount_point (stat失败)")
                    continue
                fi
            fi
        done
        
        if [ "$all_ready" = true ]; then
            log "  ✓ 所有存储挂载点已就绪"
            return 0
        fi
        
        # 每30秒打印一次进度
        if [ $((check_count % 30)) -eq 0 ]; then
            log "  等待中... 已用时 ${elapsed}秒 (检查次数: $check_count)"
            log "  待挂载: ${#pending_mounts[@]} 个挂载点"
            # 显示前5个待挂载的点
            local show_count=$((${#pending_mounts[@]} < 5 ? ${#pending_mounts[@]} : 5))
            for i in $(seq 0 $((show_count - 1))); do
                log "    - ${pending_mounts[$i]}"
            done
        fi
        
        # 检查超时
        if ((current_time >= end_time)); then
            log "  ⚠ 警告：部分存储挂载点在 ${TIMEOUT}秒 内未就绪"
            log "  未就绪的挂载点 (${#pending_mounts[@]} 个):"
            for mount_point in "${pending_mounts[@]}"; do
                log "    - $mount_point"
            done
            log "  将继续执行，但某些存储绑定可能失败"
            return 1
        fi
        
        sleep 1
    done
}

# 检查容器网络配置
check_container_network() {
    if ! machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
        return 1
    fi

    if [ "$INAIS_NETWORK_MODE" == "docker" ]; then
        # 检查容器内网络配置
        local ip_check=$(machinectl shell "$CONTAINER_NAME" /bin/ip addr show dev host0 2>/dev/null | grep "253.${NODE_ID}")
        if [ -z "$ip_check" ]; then
            log "容器网络配置异常，重新配置"
            configure_container_network
            return 1
        fi
    elif [ "$INAIS_NETWORK_MODE" == "veth" ]; then
        # 检查veth网络配置
        local host_ip_check=$(ip addr show dev ve-$CONTAINER_NAME 2>/dev/null | grep "253.${NODE_ID}.${CONTAINER_INDEX}.1")
        local container_ip_check=$(machinectl shell "$CONTAINER_NAME" /bin/ip addr show dev host0 2>/dev/null | grep "253.${NODE_ID}.${CONTAINER_INDEX}.2")
        if [ -z "$host_ip_check" ] || [ -z "$container_ip_check" ]; then
            log "veth网络配置异常，重新配置"
            configure_container_network
            return 1
        fi
    fi
    return 0
}

# 配置容器网络
configure_container_network() {
    if [ "$INAIS_NETWORK_MODE" == "docker" ]; then
        log "配置docker网络模式"
        machinectl shell "$CONTAINER_NAME" /bin/ip link set up dev host0 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip address flush dev host0 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip address add 253.${NODE_ID}.$[CONTAINER_INDEX/256+1].$[CONTAINER_INDEX%256]/16 broadcast 253.${NODE_ID}.255.255 dev host0 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip route del default 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip route add default via 253.${NODE_ID}.0.1 dev host0 2>/dev/null
    elif [ "$INAIS_NETWORK_MODE" == "veth" ]; then
        log "配置veth网络模式"
        # 在host上配置网络接口
        ip link set up dev ve-$CONTAINER_NAME 2>/dev/null
        ip address flush dev ve-$CONTAINER_NAME 2>/dev/null
        ip address add 253.${NODE_ID}.${CONTAINER_INDEX}.1/24 dev ve-$CONTAINER_NAME 2>/dev/null

        if ! iptables -t nat -C POSTROUTING -s 253.0.0.0/8 -j MASQUERADE 2>/dev/null; then
            iptables -t nat -A POSTROUTING -s 253.0.0.0/8 -j MASQUERADE
        fi
        
        # 在容器内配置网络接口
        machinectl shell "$CONTAINER_NAME" /bin/ip link set up dev host0 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip address flush dev host0 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip address add 253.${NODE_ID}.${CONTAINER_INDEX}.2/24 broadcast 253.${NODE_ID}.${CONTAINER_INDEX}.255 dev host0 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip route del default 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/ip route add default via 253.${NODE_ID}.${CONTAINER_INDEX}.1 dev host0 2>/dev/null
        machinectl shell "$CONTAINER_NAME" /bin/systemctl start network-online.target 2>/dev/null
    fi
}

# 检查tmux session
check_tmux_session() {
    if ! tmux has-session -t "_container-$CONTAINER_NAME" 2>/dev/null; then
        log "tmux session不存在，重新创建容器"
        create_container
        return 1
    fi
    return 0
}

# 检查并清理容器内的绑定目标路径
# 参数: $1=源路径(用于判断类型), $2=目标路径(容器内路径)
check_and_clean_bind_target() {
    local src_path="$1"
    local dest_path="$2"
    local container_root="$CONTAINER_ROOT"
    
    # 构建容器内的完整路径
    local container_target_path="${container_root}${dest_path}"
    
    # 检查容器内目标路径是否存在
    if [ ! -e "$container_target_path" ]; then
        return 0
    fi
    
    # 获取源类型和容器内目标类型
    local src_is_file=false
    local src_is_dir=false
    local src_is_device=false
    
    if [ -f "$src_path" ]; then
        src_is_file=true
    elif [ -d "$src_path" ]; then
        src_is_dir=true
    elif [ -c "$src_path" ] || [ -b "$src_path" ]; then
        src_is_device=true
    fi
    
    # 如果源是文件，检查容器内是否存在冲突的普通文件
    if [ "$src_is_file" = true ]; then
        if [ -f "$container_target_path" ]; then
            if [ ! -s "$container_target_path" ]; then
                log "    清理容器内空文件: $dest_path"
                rm -f "$container_target_path"
            else
                log "    ⚠ WARNING: 容器内存在非空文件 $dest_path，重命名为 ${dest_path}.old"
                # 先删除已存在的 .old 文件
                rm -f "${container_target_path}.old"
                mv "$container_target_path" "${container_target_path}.old"
            fi
        fi
    # 如果源是目录，检查容器内是否存在冲突的普通文件（不应该是文件）
    elif [ "$src_is_dir" = true ]; then
        if [ -f "$container_target_path" ]; then
            if [ ! -s "$container_target_path" ]; then
                log "    清理容器内空文件(源为目录): $dest_path"
                rm -f "$container_target_path"
            else
                log "    ⚠ WARNING: 容器内存在非空文件(源为目录) $dest_path，重命名为 ${dest_path}.old"
                rm -f "${container_target_path}.old"
                mv "$container_target_path" "${container_target_path}.old"
            fi
        fi
    # 如果源是设备文件，检查容器内是否存在冲突的普通文件
    elif [ "$src_is_device" = true ]; then
        if [ -f "$container_target_path" ]; then
            if [ ! -s "$container_target_path" ]; then
                log "    清理容器内空文件(源为设备): $dest_path"
                rm -f "$container_target_path"
            else
                log "    ⚠ WARNING: 容器内存在非空文件(源为设备) $dest_path，重命名为 ${dest_path}.old"
                rm -f "${container_target_path}.old"
                mv "$container_target_path" "${container_target_path}.old"
            fi
        fi
    fi
}

# 创建容器
create_container() {
    log "创建容器 $CONTAINER_NAME"
    
    log "切换到容器根目录: $CONTAINER_ROOT"
    cd "$CONTAINER_ROOT" || exit 1

    bind_args=()

    log "配置网络参数 (模式: $INAIS_NETWORK_MODE)"
    # 根据网络模式设置网络参数
    if [ "$INAIS_NETWORK_MODE" == "docker" ]; then
        network_args=("--network-bridge=inais0")
        log "使用 docker 网络模式: inais0"
    elif [ "$INAIS_NETWORK_MODE" == "veth" ]; then
        network_args=("--network-veth")
        log "使用 veth 网络模式"
    fi

    log "开始配置节点存储绑定 (节点范围: 1-$MAX_NODE_ID, 当前节点: $NODE_ID)"
    
    # 等待存储挂载就绪
    wait_for_storage_mounts

    if [ "$DISABLE_LOCAL_STORAGE_BINDINGS" = true ]; then
        log "⚠ 本地存储挂载已禁用 (DISABLE_LOCAL_STORAGE_BINDINGS=true)"
    fi

    for i in $(seq 1 $MAX_NODE_ID); do
        # skip current node id
        if [ "$i" -eq "$NODE_ID" ]; then
            if [ "$DISABLE_LOCAL_STORAGE_BINDINGS" = true ]; then
                log "跳过当前节点 $i 的本地存储绑定（已禁用）"
                continue
            fi
            num=$(printf "%02d" "$i")
            log "配置当前节点 $i (in${num}) 的本地存储绑定"
            mkdir -p "${LOCAL_DATA_PREFIX}/$INAIS_HOST_ACCOUNT/home"
            bind_path="${LOCAL_DATA_PREFIX}/$INAIS_HOST_ACCOUNT"
            if [ -e "$bind_path" ]; then
                bind_args+=("--bind" "$bind_path:/data/in${num}")
                log "  ✓ 绑定本地存储: $bind_path -> /data/in${num}"
            else
                log "  ✗ Warning: 跳过不存在的绑定路径 $bind_path"
            fi
            continue
        fi
        num=$(printf "%02d" "$i")
        bind_path="${NFS_STORAGE_PREFIX}${num}/$INAIS_HOST_ACCOUNT"
        log "配置远程节点 $i (nfs${num}) 的存储绑定"
        mkdir -p "$bind_path/Node${num}-Storage-Ready"
        if [ -e "$bind_path" ]; then
            bind_args+=("--bind" "$bind_path:/data/in${num}")
            log "  ✓ 绑定NFS存储: $bind_path -> /data/in${num}"
        else
            log "  ✗ Warning: 跳过不存在的绑定路径 $bind_path"
        fi
    done

    log "创建本地SSD存储目录"
    mkdir -p /var/lib/machines/_Data/inais

    log "配置静态绑定路径"
    # 构建需要绑定的路径（只有存在的才添加）
    static_binds=(
        "${LOCAL_DATA_PREFIX}/$INAIS_HOST_ACCOUNT/home:/home"
        "${LOCAL_DATA_PREFIX}/$INAIS_HOST_ACCOUNT:/data/this"
        "/var/lib/machines/_Data/inais:/ssd"
        "/mnt/inaisfs/user-group-share/8user_05/Kenvix/Main-MountPoint:/data/shared/direct"
        "/mnt/inaisfs/user-group-share/8user_05/Kenvix/Main-MountPoint:/data/master"
    )

    # 检查是否禁用NVIDIA挂载
    if [ "$DISABLE_NVIDIA_BINDINGS" = true ]; then
        log "⚠ NVIDIA挂载已禁用 (DISABLE_NVIDIA_BINDINGS=true)"
        nvidia_device_binds=()
        nvidia_system_binds=()
    else
        log "配置 NVIDIA 设备绑定路径"
        # NVIDIA 设备文件绑定（需要读写权限）
        nvidia_device_binds=(
            "/dev/dri"
        "/dev/nvidia0"
        "/dev/nvidia1" 
        "/dev/nvidia2"
        "/dev/nvidia3"
        "/dev/nvidia4"
        "/dev/nvidia5"
        "/dev/nvidia6"
        "/dev/nvidia7"
        "/dev/nvidiactl"
        "/dev/nvidia-modeset"
        "/dev/nvidia-uvm"
        "/dev/nvidia-uvm-tools"
        "/dev/nvidia-caps"
        "/sys/module/nvidia"
        "/sys/module/nvidia_modeset"
        "/sys/module/nvidia_drm"
        "/sys/module/nvidia_uvm"
        "/dev/nvidia-nvswitchctl"
    )

        log "配置 NVIDIA 系统路径绑定（只读）"
        # NVIDIA 系统路径绑定（只读即可）
        nvidia_system_binds=(
            "/proc/driver/nvidia"
            "/sys/devices/pci0000:00"
            "/sys/class/drm"
        )
    fi

    log "配置只读绑定路径"
    # 改进的NVIDIA文件挂载处理函数
    scan_nvidia_files() {
        local search_path="$1"
        local pattern="$2"
        local file_type="$3"
        local count=0
        
        if [ ! -d "$search_path" ]; then
            log "  ✗ $search_path 目录不存在"
            return 0
        fi
        
        # 使用关联数组去重，避免重复绑定同一路径
        declare -A seen_files
        
        # 查找文件和软链接 (-type f -o -type l)
        while IFS= read -r -d '' item; do
            # 跳过已处理的路径
            if [ -z "${seen_files[$item]}" ]; then
                seen_files["$item"]=1
                
                # 如果是软链接
                if [ -L "$item" ]; then
                    local real_path=$(readlink -f "$item")
                    if [ -e "$real_path" ]; then
                        # 首先确保真实文件被绑定（如果还没有且在不同目录）
                        local target_dir=$(dirname "$real_path")
                        local item_dir=$(dirname "$item")
                        if [ -z "${seen_files[$real_path]}" ] && [ "$target_dir" != "$item_dir" ] && [[ "$real_path" == /usr/lib* ]]; then
                            static_readonly_binds+=("$real_path")
                            seen_files["$real_path"]=1
                            log "    ✓ $real_path (软链接目标文件)"
                            count=$((count + 1))
                        fi
                        # 软链接本身也需要绑定（无论真实文件是否已处理）
                        static_readonly_binds+=("$item")
                        log "    ✓ $item (软链接 -> $real_path)"
                        count=$((count + 1))
                    else
                        log "    ✗ Warning: 软链接目标不存在 $item -> $real_path"
                    fi
                else
                    # 普通文件
                    static_readonly_binds+=("$item")
                    log "    ✓ $item (文件)"
                    count=$((count + 1))
                fi
            fi
        done < <(find "$search_path" -name "$pattern" \( -type f -o -type l \) -print0 2>/dev/null)
        
        log "  ✓ 在 $search_path 找到 $count 个有效的 $file_type"
        return $count
    }
    
    # 只读绑定的路径（使用简化格式，省略相同的目标路径）
    static_readonly_binds=(
        "/usr/lib/firmware/nvidia"
    )
    
    # 扫描NVIDIA可执行文件（如果未禁用）
    nvidia_bin_count=0
    nvidia_lib_count=0
    if [ "$DISABLE_NVIDIA_BINDINGS" != true ]; then
        log "动态扫描 NVIDIA 可执行文件"
        scan_nvidia_files "/usr/bin" "nvidia-*" "NVIDIA可执行文件"
        nvidia_bin_count=$?
    
        # 扫描CUDA可执行文件
        scan_nvidia_files "/usr/bin" "nvcc*" "CUDA编译器"
        nvidia_bin_count=$((nvidia_bin_count + $?))
    
        scan_nvidia_files "/usr/bin" "nsight*" "NVIDIA分析工具"
        nvidia_bin_count=$((nvidia_bin_count + $?))
    
        log "动态扫描 NVIDIA 库文件"
        # 扫描NVIDIA库文件
        scan_nvidia_files "/usr/lib/x86_64-linux-gnu" "libnvidia*" "NVIDIA库文件"
        nvidia_lib_count=$?
    
        # 扫描其他可能的库路径
        scan_nvidia_files "/usr/lib" "libnvidia*" "NVIDIA库文件"
        nvidia_lib_count=$((nvidia_lib_count + $?))
    
        # 扫描CUDA相关库
        scan_nvidia_files "/usr/lib/x86_64-linux-gnu" "libcuda*" "CUDA库文件"
        nvidia_lib_count=$((nvidia_lib_count + $?))
    
        log "NVIDIA文件扫描完成:"
        log "  - 库文件: $nvidia_lib_count 个"
        log "  - 可执行文件: $nvidia_bin_count 个"
        log "  - 总计: $((nvidia_lib_count + nvidia_bin_count)) 个NVIDIA相关文件"
    else
        log "跳过NVIDIA文件扫描 (DISABLE_NVIDIA_BINDINGS=true)"
    fi

    log "处理绑定路径有效性检查"
    dynamic_binds=()
    dynamic_readonly_binds=()
    
    log "  检查普通绑定路径..."
    bind_count=0
    # 处理普通绑定
    for bind_entry in "${static_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            # 如果源路径是软链接，解析为真实路径
            if [ -L "$src_path" ]; then
                real_path=$(readlink -f "$src_path")
                if [ -e "$real_path" ]; then
                    # 软链接：绑定真实路径到原路径位置，保持容器内路径不变
                    if [ "$src_path" != "$dest_path" ]; then
                        dynamic_binds+=("--bind=$real_path:$dest_path")
                        log "    ✓ $src_path (软链接 -> $real_path:$dest_path)"
                    else
                        # 简化格式：保持容器内使用原路径
                        dynamic_binds+=("--bind=$real_path:$src_path")
                        log "    ✓ $src_path (软链接 -> $real_path:$src_path)"
                    fi
                else
                    log "    ✗ Warning: 软链接目标不存在 $src_path -> $real_path"
                    continue
                fi
            else
                dynamic_binds+=("--bind=$bind_entry")
                log "    ✓ $src_path"
            fi
            bind_count=$((bind_count + 1))
        else
            log "    ✗ Warning: 跳过不存在的绑定路径 $src_path"
        fi
    done
    log "  有效的普通绑定: $bind_count 个"
    
    log "  检查 NVIDIA 设备绑定路径..."
    nvidia_device_count=0
    # 处理 NVIDIA 设备绑定（需要读写权限）
    for bind_entry in "${nvidia_device_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            # 如果源路径是软链接，解析为真实路径
            if [ -L "$src_path" ]; then
                real_path=$(readlink -f "$src_path")
                if [ -e "$real_path" ]; then
                    # 软链接：绑定真实路径到原路径位置，保持容器内路径不变
                    if [ "$src_path" != "$dest_path" ]; then
                        dynamic_binds+=("--bind=$real_path:$dest_path")
                        log "    ✓ $src_path (NVIDIA设备，软链接 -> $real_path:$dest_path)"
                    else
                        # 简化格式：保持容器内使用原路径
                        dynamic_binds+=("--bind=$real_path:$src_path")
                        log "    ✓ $src_path (NVIDIA设备，软链接 -> $real_path:$src_path)"
                    fi
                else
                    log "    ✗ Warning: NVIDIA设备软链接目标不存在 $src_path -> $real_path"
                    continue
                fi
            else
                dynamic_binds+=("--bind=$bind_entry")
                log "    ✓ $src_path (NVIDIA设备)"
            fi
            nvidia_device_count=$((nvidia_device_count + 1))
        else
            log "    ✗ Warning: 跳过不存在的NVIDIA设备 $src_path"
        fi
    done
    log "  有效的 NVIDIA 设备绑定: $nvidia_device_count 个"
    
    log "  检查 NVIDIA 系统路径绑定..."
    nvidia_system_count=0
    # 处理 NVIDIA 系统路径绑定（只读）
    for bind_entry in "${nvidia_system_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            # 如果源路径是软链接，解析为真实路径
            if [ -L "$src_path" ]; then
                real_path=$(readlink -f "$src_path")
                if [ -e "$real_path" ]; then
                    # 软链接：绑定真实路径到原路径位置，保持容器内路径不变
                    if [ "$src_path" != "$dest_path" ]; then
                        dynamic_readonly_binds+=("--bind-ro=$real_path:$dest_path")
                        log "    ✓ $src_path (NVIDIA系统路径，只读，软链接 -> $real_path:$dest_path)"
                    else
                        # 简化格式：保持容器内使用原路径
                        dynamic_readonly_binds+=("--bind-ro=$real_path:$src_path")
                        log "    ✓ $src_path (NVIDIA系统路径，只读，软链接 -> $real_path:$src_path)"
                    fi
                else
                    log "    ✗ Warning: NVIDIA系统路径软链接目标不存在 $src_path -> $real_path"
                    continue
                fi
            else
                dynamic_readonly_binds+=("--bind-ro=$bind_entry")
                log "    ✓ $src_path (NVIDIA系统路径，只读)"
            fi
            nvidia_system_count=$((nvidia_system_count + 1))
        else
            log "    ✗ Warning: 跳过不存在的NVIDIA系统路径 $src_path"
        fi
    done
    log "  有效的 NVIDIA 系统路径绑定: $nvidia_system_count 个"
    
    log "  检查只读绑定路径..."
    readonly_bind_count=0
    # 处理只读绑定
    for bind_entry in "${static_readonly_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            # 如果源路径是软链接，解析为真实路径
            if [ -L "$src_path" ]; then
                real_path=$(readlink -f "$src_path")
                if [ -e "$real_path" ]; then
                    # 软链接：绑定真实路径到原路径位置，保持容器内路径不变
                    if [ "$src_path" != "$dest_path" ]; then
                        dynamic_readonly_binds+=("--bind-ro=$real_path:$dest_path")
                        log "    ✓ $src_path (只读，软链接 -> $real_path:$dest_path)"
                    else
                        # 简化格式：保持容器内使用原路径
                        dynamic_readonly_binds+=("--bind-ro=$real_path:$src_path")
                        log "    ✓ $src_path (只读，软链接 -> $real_path:$src_path)"
                    fi
                else
                    log "    ✗ Warning: 只读绑定软链接目标不存在 $src_path -> $real_path"
                    continue
                fi
            else
                dynamic_readonly_binds+=("--bind-ro=$bind_entry")
                log "    ✓ $src_path (只读)"
            fi
            readonly_bind_count=$((readonly_bind_count + 1))
        else
            log "    ✗ Warning: 跳过不存在的只读绑定路径 $src_path"
        fi
    done
    log "  有效的只读绑定: $readonly_bind_count 个"

    log "检查并清理容器内的绑定目标路径冲突..."
    # 检查普通绑定
    for bind_entry in "${static_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            # 如果是软链接，解析真实路径用于类型判断
            actual_src="$src_path"
            if [ -L "$src_path" ]; then
                actual_src=$(readlink -f "$src_path")
            fi
            check_and_clean_bind_target "$actual_src" "$dest_path"
        fi
    done
    
    # 检查NVIDIA设备绑定
    for bind_entry in "${nvidia_device_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            actual_src="$src_path"
            if [ -L "$src_path" ]; then
                actual_src=$(readlink -f "$src_path")
            fi
            check_and_clean_bind_target "$actual_src" "$dest_path"
        fi
    done
    
    # 检查NVIDIA系统路径绑定
    for bind_entry in "${nvidia_system_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            actual_src="$src_path"
            if [ -L "$src_path" ]; then
                actual_src=$(readlink -f "$src_path")
            fi
            check_and_clean_bind_target "$actual_src" "$dest_path"
        fi
    done
    
    # 检查只读绑定
    for bind_entry in "${static_readonly_binds[@]}"; do
        src_path="${bind_entry%%:*}"
        dest_path="${bind_entry#*:}"
        if [ -e "$src_path" ]; then
            actual_src="$src_path"
            if [ -L "$src_path" ]; then
                actual_src=$(readlink -f "$src_path")
            fi
            check_and_clean_bind_target "$actual_src" "$dest_path"
        fi
    done
    log "  ✓ 容器路径冲突检查完成"

    log "停止现有容器实例"
    # 停止现有容器
    if machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
        log "  发现现有容器，正在终止..."
        machinectl terminate "$CONTAINER_NAME" 2>/dev/null || true
        log "  等待容器完全停止..."
        sleep 2
        log "  ✓ 容器已停止"
    else
        log "  ✓ 无现有容器实例"
    fi
    
    log "准备启动容器 (systemd-nspawn)"
    log "  容器目录: $CONTAINER_ROOT"
    log "  主机名: $INAIS_HOSTNAME"
    log "  网络参数: ${network_args[*]}"
    if [ "$USE_RINETD_PORT_FORWARDING" = true ]; then
        local host_end=$((PORT_FORWARD_HOST_START + PORT_FORWARD_COUNT - 1))
        local container_end=$((PORT_FORWARD_CONTAINER_START + PORT_FORWARD_COUNT - 1))
        log "  端口转发: 使用 rinetd (${PORT_FORWARD_HOST_START}-${host_end} -> 容器${PORT_FORWARD_CONTAINER_START}-${container_end})"
    else
        local host_end=$((PORT_FORWARD_HOST_START + PORT_FORWARD_COUNT - 1))
        local container_end=$((PORT_FORWARD_CONTAINER_START + PORT_FORWARD_COUNT - 1))
        log "  端口映射: systemd-nspawn 内置 (${PORT_FORWARD_HOST_START}-${host_end} -> ${PORT_FORWARD_CONTAINER_START}-${container_end})"
    fi
    log "  绑定参数数量: 普通=${#dynamic_binds[@]}, 只读=${#dynamic_readonly_binds[@]}, 存储=${#bind_args[@]}"
    
    # 构建端口映射参数
    port_args=()
    if [ "$USE_RINETD_PORT_FORWARDING" != true ]; then
        # 使用systemd-nspawn内置端口映射
        for i in $(seq 0 $((PORT_FORWARD_COUNT - 1))); do
            local host_port=$((PORT_FORWARD_HOST_START + i))
            local container_port=$((PORT_FORWARD_CONTAINER_START + i))
            port_args+=("--port=${host_port}:${container_port}")
        done
    fi

    log "配置路由表 ..."
    ip link delete vb-$CONTAINER_NAME type bridge 2>/dev/null || true
    create_route
    
    log "执行 systemd-nspawn 命令..."
    log "Args: ${port_args[*]} ${network_args[*]} ${dynamic_binds[*]} ${dynamic_readonly_binds[*]} ${bind_args[*]}"
    tmux new-session -d -s "_container-$CONTAINER_NAME" \
    systemd-nspawn \
        --directory="$CONTAINER_ROOT" \
        "${port_args[@]}" \
        --hostname="$INAIS_HOSTNAME" \
        --capability=all \
        --property=DeviceAllow="/dev/nvidia* rwm" \
        --property=DeviceAllow="/dev/dri* rwm" \
        --property=DeviceAllow="/dev/nvidiactl rwm" \
        --property=DeviceAllow="/dev/nvidia-modeset rwm" \
        --property=DeviceAllow="/dev/nvidia-uvm rwm" \
        --property=DeviceAllow="/dev/nvidia-uvm-tools rwm" \
        --property=DeviceAllow="/dev/nvidia-caps* rwm" \
        "${network_args[@]}" \
        --boot \
        --private-users=no \
        --resolv-conf=off \
        "${dynamic_binds[@]}" \
        "${dynamic_readonly_binds[@]}" \
        "${bind_args[@]}" &
    
    local nspawn_pid=$!
    log "systemd-nspawn 进程已启动 (PID: $nspawn_pid)"

    # 等待容器启动
    wait_for_container_ready
}


# 等待容器就绪
wait_for_container_ready() {
    local TIMEOUT=600
    local start_time=$(date +%s)
    local end_time=$((start_time + TIMEOUT))

    log "等待容器 $CONTAINER_NAME 启动（超时时间 ${TIMEOUT}秒）..."
    local check_count=0
    while true; do
        check_count=$((check_count + 1))
        local current_time=$(date +%s)
        local elapsed=$((current_time - start_time))
        
        # 每10秒打印一次进度
        if [ $((check_count % 10)) -eq 0 ]; then
            log "  等待中... 已用时 ${elapsed}秒 (检查次数: $check_count)"
        fi
        
        if machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
            log "  ✓ 容器状态检查通过，测试shell连接..."
            if machinectl shell "$CONTAINER_NAME" /bin/true >/dev/null 2>&1; then
                log "  ✓ 容器shell测试通过"
                log "容器就绪，开始配置网络"
                configure_container_network
                
                # 启动rinetd端口转发（如果启用）
                if [ "$USE_RINETD_PORT_FORWARDING" = true ]; then
                    start_rinetd
                fi
                
                log "✓ 容器创建完成"
                break
            else
                if [ $((check_count % 10)) -eq 0 ]; then
                    log "  容器已启动但shell未就绪，继续等待..."
                fi
            fi
        else
            if [ $((check_count % 10)) -eq 0 ]; then
                log "  容器状态检查失败，继续等待..."
            fi
        fi

        if ((current_time >= end_time)); then
            log "错误：容器未在 ${TIMEOUT}秒 内启动。"
            log "最终状态检查："
            if machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
                log "  容器状态: 运行中"
                machinectl status "$CONTAINER_NAME" || true
            else
                log "  容器状态: 未运行"
            fi
            return 1
        fi

        sleep 1
    done
}

# 执行完整的监控检查
perform_full_check() {
    log "开始监控检查..."
    
    # 检查网络设备
    check_network_device
    
    # 检查tmux session
    if check_tmux_session; then
        # 检查容器网络配置
        check_container_network
        
        # 检查rinetd状态（如果启用）
        if [ "$USE_RINETD_PORT_FORWARDING" = true ]; then
            if ! check_rinetd_status; then
                log "rinetd未运行，重新启动"
                start_rinetd
            fi
        fi
    fi
    
    log "监控检查完成"
}

# 可中断的睡眠函数
interruptible_sleep() {
    local duration=$1
    local elapsed=0
    
    while [ $elapsed -lt $duration ]; do
        if [ "$RELOAD_REQUESTED" = true ]; then
            RELOAD_REQUESTED=false
            log "收到HUP信号，立即执行检查"
            return 0
        fi
        sleep 1
        elapsed=$((elapsed + 1))
    done
}

# 主监控循环
main_daemon() {
    log "INAIS容器守护进程启动"
    
    # 初始检查和创建
    check_network_device
    if ! check_tmux_session; then
        sleep 10  # 等待容器启动
    fi
    
    while true; do
        # 执行完整检查
        perform_full_check
        
        log "等待3分钟..."
        interruptible_sleep 180  # 3分钟，可被HUP信号中断
    done
}

# HUP信号处理 - 立即重新检查
handle_hup() {
    log "收到HUP信号，请求立即重新检查"
    RELOAD_REQUESTED=true
}

# 停止容器函数
stop_container() {
    log "正在停止容器 $CONTAINER_NAME..."
    
    # 首先停止rinetd（如果启用）
    if [ "$USE_RINETD_PORT_FORWARDING" = true ]; then
        stop_rinetd
    fi
    
    # 首先尝试优雅停止容器
    if machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
        log "发送停止信号给容器..."
        machinectl stop "$CONTAINER_NAME" 2>/dev/null || true
        ip link delete vb-$CONTAINER_NAME type bridge || true
        
        # 等待容器停止，最多等待30秒
        local timeout=30
        local elapsed=0
        while [ $elapsed -lt $timeout ]; do
            if ! machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
                log "容器已优雅停止"
                break
            fi
            sleep 1
            elapsed=$((elapsed + 1))
        done
        
        # 如果容器仍在运行，强制终止
        if machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
            log "容器未在${timeout}秒内停止，强制终止..."
            machinectl terminate "$CONTAINER_NAME" 2>/dev/null || true
            
            # 再等待10秒确认强制终止
            sleep 2
            if machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
                log "警告：容器可能仍在运行"
            else
                log "容器已强制终止"
            fi
        fi
    else
        log "容器已经停止"
    fi
    
    # 停止tmux session
    if tmux has-session -t "_container-$CONTAINER_NAME" 2>/dev/null; then
        log "停止tmux session..."
        tmux kill-session -t "_container-$CONTAINER_NAME" 2>/dev/null || true
    fi
}

# TERM/INT信号处理 - 优雅退出
cleanup() {
    log "收到终止信号，正在清理..."
    
    # 停止容器并等待停止完成
    stop_container
    
    # 记录退出日志
    log "INAIS容器守护进程已停止"
    
    exit 0
}

# 强制停止处理
force_stop() {
    log "收到强制停止信号，立即退出"
    
    # 停止rinetd（如果启用）
    if [ "$USE_RINETD_PORT_FORWARDING" = true ]; then
        stop_rinetd
    fi
    
    # 强制终止容器
    if machinectl status "$CONTAINER_NAME" >/dev/null 2>&1; then
        log "强制终止容器..."
        machinectl terminate "$CONTAINER_NAME" 2>/dev/null || true
    fi
    
    # 强制停止tmux session
    if tmux has-session -t "_container-$CONTAINER_NAME" 2>/dev/null; then
        tmux kill-session -t "_container-$CONTAINER_NAME" 2>/dev/null || true
    fi
    
    exit 1
}

# 创建rinetd配置文件
create_rinetd_config() {
    log "创建rinetd配置文件"
    
    cat > "$RINETD_CONFIG" << EOF
# INAIS Container Port Forwarding Configuration
# Generated automatically by inais daemon

EOF
    
    # 添加端口转发规则
    for i in $(seq 0 $((PORT_FORWARD_COUNT - 1))); do
        local host_port=$((PORT_FORWARD_HOST_START + i))
        local container_port=$((PORT_FORWARD_CONTAINER_START + i))
        local container_ip="253.${NODE_ID}.$((CONTAINER_INDEX/256+1)).$((CONTAINER_INDEX%256))"
        
        echo "0.0.0.0 ${host_port} ${container_ip} ${container_port}" >> "$RINETD_CONFIG"
        echo "0.0.0.0 ${host_port}/udp ${container_ip} ${container_port}/udp" >> "$RINETD_CONFIG"
    done
    
    log "rinetd配置文件已创建：$RINETD_CONFIG"
}

# 启动rinetd
start_rinetd() {
    log "启动rinetd服务"
    
    # 检查rinetd是否已安装
    if ! command -v rinetd >/dev/null 2>&1; then
        log "错误：rinetd未安装，请先安装rinetd"
        return 1
    fi
    
    # 检查是否已经有我们的rinetd在运行
    if check_rinetd_status; then
        log "rinetd已在运行，无需重新启动"
        return 0
    fi
    
    # 停止可能存在的我们的rinetd进程
    stop_rinetd
    
    # 创建配置文件
    create_rinetd_config
    
    # 启动rinetd（后台运行）
    rinetd -c "$RINETD_CONFIG" &
    local rinetd_pid=$!
    
    # 等待一下确保进程启动
    sleep 1
    
    # 检查进程是否仍在运行
    if kill -0 "$rinetd_pid" 2>/dev/null; then
        echo "$rinetd_pid" > "$RINETD_PID"
        log "rinetd启动成功，PID: $rinetd_pid"
        local host_end=$((PORT_FORWARD_HOST_START + PORT_FORWARD_COUNT - 1))
        local container_end=$((PORT_FORWARD_CONTAINER_START + PORT_FORWARD_COUNT - 1))
        log "端口转发：${PORT_FORWARD_HOST_START}-${host_end} -> 容器${PORT_FORWARD_CONTAINER_START}-${container_end}"
        log "配置文件：$RINETD_CONFIG"
    else
        log "错误：rinetd启动失败"
        return 1
    fi
}

# 停止rinetd
stop_rinetd() {
    log "停止rinetd服务"
    
    # 通过PID文件停止（只停止我们启动的进程）
    if [ -f "$RINETD_PID" ]; then
        local pid=$(cat "$RINETD_PID")
        if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
            log "停止rinetd进程 (PID: $pid)"
            kill "$pid"
            sleep 2
            
            # 如果进程仍存在，强制杀死
            if kill -0 "$pid" 2>/dev/null; then
                log "强制终止rinetd进程"
                kill -9 "$pid"
            fi
            log "rinetd服务已停止"
        else
            log "PID文件中的进程不存在或已停止"
        fi
        rm -f "$RINETD_PID"
    else
        log "没有找到PID文件，可能rinetd未由此脚本启动"
    fi
}

# 检查rinetd状态
check_rinetd_status() {
    # 首先检查我们的PID文件
    if [ -f "$RINETD_PID" ]; then
        local pid=$(cat "$RINETD_PID")
        if [ -n "$pid" ] && kill -0 "$pid" 2>/dev/null; then
            # 验证进程确实是使用我们配置文件的rinetd
            if ps -p "$pid" -o args= | grep -q "$RINETD_CONFIG"; then
                return 0  # 我们的rinetd正在运行
            else
                log "PID文件中的进程不是我们的rinetd，清理PID文件"
                rm -f "$RINETD_PID"
            fi
        else
            # PID文件存在但进程不存在，清理PID文件
            rm -f "$RINETD_PID"
        fi
    fi
    
    # 检查是否有使用我们配置文件的rinetd进程在运行
    local our_rinetd_pid=$(pgrep -f "$RINETD_CONFIG")
    if [ -n "$our_rinetd_pid" ]; then
        log "发现使用我们配置文件的rinetd进程 (PID: $our_rinetd_pid)，更新PID文件"
        echo "$our_rinetd_pid" > "$RINETD_PID"
        return 0  # 我们的rinetd正在运行
    fi
    
    return 1  # 我们的rinetd未运行
}

# 设置信号处理器
trap handle_hup SIGHUP
trap cleanup SIGTERM SIGINT
trap force_stop SIGKILL

# 检查是否以daemon模式运行
if [ "$1" = "daemon" ]; then
    main_daemon
else
    echo "使用方法: $0 daemon"
    echo "以daemon模式运行容器监控"
    echo ""
    echo "功能："
    echo "  - 自动监控和管理systemd-nspawn容器"
    echo "  - 自动配置网络（支持docker bridge和veth模式）"
    echo "  - 支持两种端口转发方式："
    echo "    * rinetd: 外部端口转发工具 (USE_RINETD_PORT_FORWARDING=true)"
    echo "    * systemd-nspawn内置: 原生端口映射 (USE_RINETD_PORT_FORWARDING=false)"
    echo "  - 自动挂载NVIDIA设备和库文件（只读）"
    echo "  - 自动绑定存储和共享目录"
    echo ""
    echo "配置："
    echo "  USE_RINETD_PORT_FORWARDING=$USE_RINETD_PORT_FORWARDING"
    echo "  DISABLE_NVIDIA_BINDINGS=${DISABLE_NVIDIA_BINDINGS:-false}"
    echo "  PORT_FORWARD_HOST_START=$PORT_FORWARD_HOST_START"
    echo "  PORT_FORWARD_CONTAINER_START=$PORT_FORWARD_CONTAINER_START"
    echo "  PORT_FORWARD_COUNT=$PORT_FORWARD_COUNT"
    echo ""
    echo "依赖："
    echo "  - systemd-nspawn"
    echo "  - tmux"
    if [ "$USE_RINETD_PORT_FORWARDING" = true ]; then
        echo "  - rinetd（用于端口转发）"
    fi
    echo ""
    echo "信号支持："
    echo "  HUP  - 立即重新执行检查"
    echo "  TERM - 优雅退出"
    echo "  INT  - 优雅退出"
    exit 1
fi