I have written some Bash shell functions that can be useful in troubleshooting AAKE problems when the AWI is not available.
This set of functions is intended to be sourced in a Bash shell in a dedicated Linux pod running in the same Kubernetes cluster as the AAKE server processes. The AE sever logs must be stored in a PVC or NFS file system, and the pod when you use these function must have access to this file system. The path to the logs is specified by a variable at the top of the script.
#!/bin/bash
#
# uc4_functions_k8s.sh
#
# Description
# Functions for UC4 (Automation Engine) administration in Kubernetes (K8S)
#
# Usage
# Source this script in .bashrc, and then use the functions from the Linux shell.
#
# Author
# Michael A. Lowry <michael_lowry@swissre.com> <michael.lowry@gmail.com>
#
# Version Date Description
# 1.0.0 2025.06.30 Adapted previous uc4_functions.sh script for Kubernetes.
# 1.0.1 2025.07.21 Improved identification of OWP and RWP. Other small bug fixes.
# 1.0.2 2025.07.23 Improvements to proc2host, log2proc, and dwp functions.
# 1.0.3 2025.07.23 Added identification of JWP roles. Added + symbol to OWP & RWP.
# 1.0.4 2025.07.23 Added header with server name, version, and uptime.
# Set shell options
shopt -s extglob # Extended globbing enables lists like '@(A|B|C)'
UC4_Log_Dir="/usr/server/tmp/log"
# Search logs one by one, from most recent to oldest.
search_logs () {
if [[ $# -lt 2 ]]; then echo "Please specify an AE process type and search string."; return 1; fi
local proc_type=$1
local search_string=$2
local grep_options=$3
local proc_num=$4
local max_index=$5
local max_age=$6
if [[ -z "$grep_options" ]]; then grep_options="-E"; fi
if [[ -z "$proc_num" ]]; then proc_num="???"; fi
if [[ -z "$max_index" ]]; then max_index=10; fi
if [[ -z "$max_age" ]]; then max_age=28800; fi
log_prefix="${proc_type}srv_log_${proc_num}_"
log_suffix=".txt"
for i in $(seq -w 00 $max_index); do
file="${log_prefix}${i}${log_suffix}"
grep_output=$(find ${UC4_Log_Dir} -name "$file" -mmin "-$max_age" -exec grep "$grep_options" "$search_string" {} \; 2>/dev/null )
if [[ -n "$grep_output" ]]; then echo "$grep_output"; break; fi
done
}
# Identify the Primary Work Process (PWP)
pwp () {
search_logs "WP" "U00003475" "-E" '???' 10 100000 | sort | tail -1 | cut -d"'" -f2 | cut -d# -f2
}
# Identify the WP with role O (outputs)
owp () {
search_logs "WP" "U00003344 Server .* assumes the role 'O'" '' '???' '10' '648000' | sort | tail -1 | cut -d"'" -f2 | cut -d# -f2
}
# Identify the WP with role R (resource calculations)
rwp () {
search_logs "WP" "U00003344 Server .* assumes the role 'R'" '' '???' '10' '648000' | sort | tail -1 | cut -d"'" -f2 | cut -d# -f2
}
# Identify Java work processes (JWPs)
jwp () {
log2proc "$(search_logs 'WP' 'U02000090' '-l' '???' '00')"
}
# Identify Java communications processes (JCPs) -- NOTE: includes REST processes too.
jcp () {
log2proc "$(search_logs 'CP' 'U02000090' '-l' '???' '00')"
}
# Identify REST processes (RESTPs)
restp() {
log2proc "$(search_logs 'CP' 'U00003400 Server .REST API.' '-l' '???' '00')"
}
# Identify Work processes (WPs)
aewp () {
log2proc "$(search_logs 'WP' 'U02000090' '-L' '???' '00')"
}
# Identify Communications processes (CPs)
aecp () {
log2proc "$(search_logs 'CP' 'U02000090' '-L' '???' '00')"
}
# Identify Dialog work processes (DWPs)
dwp () {
if [[ -z $WP_Messages ]]; then wp_messages; fi
aewp | while IFS= read -r Proc_Name; do
echo "$WP_Messages" | grep "$Proc_Name" 2>/dev/null | tail -1 | awk '$3 == "DWP" {print $2}'
done
}
# Identify JWP roles
jwp2role () {
if [[ $# -lt 1 ]]; then echo "Please provide an AE process name.";return 1; fi
local Proc_Name=$1
Proc_Name=$(normalizeProcName $Proc_Name)
Proc_Num="${Proc_Name:2}"
search_logs 'WP' 'U00045395.*\AUT|U00045395.*\IDX|U00045395.*\PER|U00045395.*\UTL' '-E' "$Proc_Num" '10' 200000 | grep -vE '\[\]|\[JWP\]' | sort | tail -1 | grep -oP '\[\K[^\]]+' | sed -E 's/(,)?JWP(,)?//g'
}
# Identify Java work processes with the AUT role
jwp-roles () {
jwp | while IFS= read -r Proc_Name; do
echo "$Proc_Name" $(jwp2role "$Proc_Name")
done
}
# Take log file paths and extract just the AE process names, e.g., 'WP002'
log2proc () {
(
while IFS= read -r Proc_Log; do
local Log_Basename=$(basename "$Proc_Log")
local Proc_Type="${Log_Basename:0:2}"
local Proc_Num="$(echo ${Log_Basename} | cut -d'_' -f 3)"
local Proc_Name="${Proc_Type}${Proc_Num}"
echo "${Proc_Name}"
done <<< "$1"
) | sort
}
# Take names like 'wp2' and convert them to a standard format like 'WP002'.
normalizeProcName () {
local Proc_Name=$1
Proc_Name=$(echo "$Proc_Name" | tr [:lower:] [:upper:])
Proc_Type="${Proc_Name:0:2}"
Proc_Num="${Proc_Name:2}"
case $Proc_Type in @(CP|WP));; *) echo "ERROR: Process type must be CP or WP." >&2; return 1;; esac
if [[ ${#Proc_Num} -eq 1 ]]; then Proc_Num="00$Proc_Num";
elif [[ ${#Proc_Num} -eq 2 ]]; then Proc_Num="0$Proc_Num";
fi
Proc_Name="${Proc_Type}${Proc_Num}"
echo "$Proc_Name"
}
# Take AE process names like 'WP002', and generate pathnames to the most recent (00) log.
proc2log () {
if [[ $# -lt 1 ]]; then echo "Please provide an AE process name.";return 1; fi
while IFS= read -r Proc_Name; do
local Proc_Name=$(normalizeProcName $Proc_Name)
local Proc_Type="${Proc_Name:0:2}"
local Proc_Num="${Proc_Name:2}"
local Proc_Log="${UC4_Log_Dir}/${Proc_Type}srv_log_${Proc_Num}_00.txt"
echo "$Proc_Log"
done <<< "$1"
}
# Take an AE process name, and print a list of all of the logs for this process.
proc2logs () {
if [[ $# -lt 1 ]]; then echo "Please provide an AE process name.";return 1; fi
while IFS= read -r Proc_Name; do
local Proc_Name=$(normalizeProcName $Proc_Name)
local Proc_Type="${Proc_Name:0:2}"
local Proc_Num="${Proc_Name:2}"
ls ${UC4_Log_Dir}/${Proc_Type}srv_log_${Proc_Num}_*.txt 2>/dev/null | sort -V
done <<< "$1"
}
# Take AE process names like 'WP002', and look up the host/pod name.
proc2host () {
if [[ $# -lt 1 ]]; then echo "Please provide an AE process name.";return 1; fi
while IFS= read -r Proc_Name; do
local Proc_Name=$(normalizeProcName $Proc_Name)
local Proc_Type="${Proc_Name:0:2}"
local Proc_Num="${Proc_Name:2}"
# Example message: U00003492 Server has been started on Host 'ae-wp-5bb4d7c578-rhhfn'
Host_Name=$(search_logs $Proc_Type 'U00003492' '' $Proc_Num '02' 2>/dev/null | cut -d\' -f 2 | grep '[A-Za-z]' | sort | tail -1 )
# If the pod/host name doesn't appear in the 00 log, look in all the logs for this process.
# Example message: U02000066 Host information: Host name='jwp-54dc97b7cc-mc45m', IP address='10.0.0.1'
if [[ -z "$Host_Name" ]]; then
Host_Name=$(search_logs $Proc_Type 'U02000066' '' $Proc_Num 2>/dev/null | sort -t':' -k 2 | tail -1 | cut -d\' -f 2 )
fi
echo "$Host_Name"
done <<< "$1"
}
wp_messages () {
# This function collects both WP start messages and WP mode change messages.
# This brute-force approach was devised to identify DWPs.
wp_mode_counter=0
WP_Messages=""
Message_Body=""
# Collect WP start messages. (When WPs start, they are normal WPs by default.)
while read -rs WP_Start_Message; do
(( wp_mode_counter++ ))
if [[ $( echo $WP_Start_Message | tr -cd '-' | wc -c ) -eq 2 ]];then
Message_Timestamp=$( echo $WP_Start_Message | awk -F' - ' '{print $2}' )
else
Message_Timestamp=$( echo $WP_Start_Message | awk -F' - ' '{print $1}' | cut -d':' -f 2 )
fi
WP_Name="WP$(echo $WP_Start_Message | cut -d'_' -f 3)"
WP_Mode="WP"
# Build message
Message_Body="$Message_Timestamp $WP_Name $WP_Mode"
# Add message to list
if [[ $wp_mode_counter -eq 1 ]]; then
WP_Messages=$(printf "%s\n" "$Message_Body")
else
WP_Messages=$(printf "%s\n%s\n" "$WP_Messages" "$Message_Body")
fi
done <<< $(grep U00003400 ${UC4_Log_Dir}/WPsrv_log_???_00.txt 2>/dev/null | grep '#WP' 2>/dev/null )
# Collect WP mode change messages
while read -rs WP_ModeChange_Message; do
(( wp_mode_counter++ ))
if [[ $( echo $WP_ModeChange_Message | tr -cd '-' | wc -c ) -eq 2 ]];then
Message_Timestamp=$( echo $WP_ModeChange_Message | awk -F' - ' '{print $3}' )
else
Message_Timestamp=$( echo $WP_ModeChange_Message | awk -F' - ' '{print $1}' | cut -d':' -f 2 )
fi
WP_Name=$(echo $WP_ModeChange_Message | awk -F"#|'" '{print $3}')
WP_Mode=$(echo $WP_ModeChange_Message | cut -d"'" -f 6)
# Build message
Message_Body="$Message_Timestamp $WP_Name $WP_Mode"
# Add message to list
if [[ $wp_mode_counter -eq 1 ]]; then
WP_Messages=$(printf "%s\n" "$Message_Body")
else
WP_Messages=$(printf "%s\n%s\n" "$WP_Messages" "$Message_Body")
fi
done <<< $(egrep "U00003389 Server.*WP.*DWP|U00003389 Server.*DWP.*WP" ${UC4_Log_Dir}/WPsrv_log_???_*.txt 2>/dev/null )
# Write messages to output
echo "$WP_Messages" | sort | uniq
}
# Get a list of all 00-generation logs (the most recent).
ae_logs () {
find ${UC4_Log_Dir} -name \?Psrv_log_\?\?\?_00.txt -mmin -28800 | sort
}
# Get a list of of AE server processes, based on log file names.
ae_proc () {
log2proc "$(ae_logs)"
}
pod2type () {
if [[ $# -lt 1 ]]; then echo "Please provide an AE pod name.";return 1; fi
while IFS= read -r Proc_Host; do
Proc_Type=""
case "$Proc_Host" in
jcp-rest*) Proc_Type="REST";;
jcp-ws*) Proc_Type="JCP";;
jwp-*) Proc_Type="JWP";;
*wp-*) Proc_Type="WP";;
*cp-*) Proc_Type="CP";;
esac
done <<< "$1"
echo "$Proc_Type"
}
# Display information about running AE server processes, gleaned from logs.
ae () {
ae_counter=0
if [[ $# -eq 0 ]]; then
Proc_List="$(ae_proc)"
elif [[ $# -eq 1 ]]; then
Proc_List=$(normalizeProcName $1)
if [[ ! -f $(proc2log "$Proc_List") ]]; then
echo "There is no log for AE process ${Proc_List}. Is it running?"
return 1
fi
elif [[ $# -gt 1 ]]; then
echo "Usage: ae [process name]"
return 1
fi
PWP_Log=$(proc2log $(pwp))
Server_Info=$(grep -E 'U00003380|U00003400' "$PWP_Log")
Server_Name=$(echo $Server_Info | cut -d"'" -f2 | cut -d'#' -f1 )
Server_Version=$(echo $Server_Info | cut -d"'" -f4 )
#Server_Uptime=$(echo $Server_Info | cut -d"'" -f6 )
printf "%-20s %-32s %s\n" "Server" "Version" # "Uptime"
printf "%-20s %-32s %s\n" "$Server_Name" "$Server_Version" # "$Server_Uptime"
echo
# Create extended globbing lists of processes of each type.
if [[ -z $CP ]]; then CP="@("$(aecp | paste -sd'|' -)")"; fi
if [[ -z $WP ]]; then WP="@("$(aewp | paste -sd'|' -)")"; fi
if [[ -z $PWP ]]; then PWP="@("$(pwp | paste -sd'|' -)")"; fi
if [[ -z $OWP ]]; then OWP="@("$(owp | paste -sd'|' -)")"; fi
if [[ -z $RWP ]]; then RWP="@("$(rwp | paste -sd'|' -)")"; fi
if [[ -z $DWP ]]; then DWP="@("$(dwp | paste -sd'|' -)")"; fi
if [[ -z $JWP ]]; then JWP="@("$(jwp | paste -sd'|' -)")"; fi
if [[ -z $JCP ]]; then JCP="@("$(jcp | paste -sd'|' -)")"; fi
if [[ -z $RESTP ]]; then RESTP="@("$(restp | paste -sd'|' -)")"; fi
for Proc_Name in $Proc_List; do
(( ae_counter++ ))
local Proc_Type=""; local Host_Name=""; local Proc_Log=""
local Proc_Log=$(proc2log "$Proc_Name")
local Host_Name=$(proc2host $Proc_Name)
if [[ -n "$Host_Name" ]]; then
Proc_Type=$(pod2type $Host_Name)
else
Host_Name="UNKOWN"
Proc_Type="UNKOWN"
fi
# Backup method of determining process type based on messages logged at process start.
if [[ "$Proc_Type" == 'UNKOWN' ]]; then
case $Proc_Name in
$CP) Proc_Type="CP";;
$WP) Proc_Type="WP";;
$JWP) Proc_Type="JWP";;
$JCP) Proc_Type="JCP";;
$RESTP) Proc_Type="RESTP";;
esac
fi
# If the process type is still unknonw, this is probably because the process is not running.
if [[ "$Proc_Type" == 'UNKOWN' ]] && [[ $# -eq 1 ]]; then
echo "Could not retrive information about AE process ${Proc_Name}. Is it running?"
log_date=$(tail -1 "$Proc_Log" | cut -d' ' -f1 | cut -d'/' -f1 )
today=$(date +%Y%m%d)
d1=$(date -d "${logdate:0:4}-${logdate:4:2}-${logdate:6:2}" +%s)
d2=$(date -d "${today:0:4}-${today:4:2}-${today:6:2}" +%s)
diff=$(( (d2 - d1) / 86400 ))
echo "The log file is $diff days old."
fi
# Handle special processes like PWP, OWP, RWP, and DWP.
case $Proc_Name in
$PWP) Proc_Type="PWP*";;
$OWP) Proc_Type="OWP+";;
$RWP) Proc_Type="RWP+";;
$DWP) Proc_Type="DWP";;
esac
# Handle JWP roles
if [[ "$Proc_Type" == 'JWP' ]]; then
JWP_Role=""
JWP_Role="$(jwp2role $Proc_Name)"
if [[ ! "$JWP_Role" == "" ]]; then
Proc_Type="JWP-$JWP_Role"
fi
fi
# Print header
if [[ $ae_counter -eq 1 ]]; then
printf "%-8s %-11s %-42s %s\n" "AE Proc" "Type/Role" "Host name" "Log file"
fi
# Print output
printf "%-8s %-11s %-42s %s\n" "$Proc_Name" "$Proc_Type" "$Host_Name" "$Proc_Log"
done
}
# Reset lists cached in shell environment variables.
aereset () {
CP=""
WP=""
PWP=""
OWP=""
RWP=""
DWP=""
JWP=""
JCP=""
RESTP=""
WP_Messages=""
}
Here is an example of the output of the ae
function.
root@debug-tools:/# ae
Server Version
UC4_TEST 24.4.0+hf.3.build.1748423189162
AE Proc Type/Role Host name Log file
CP001 CP ae-cp-b9b5848d4-kvwf8 /usr/server/tmp/log/CPsrv_log_001_00.txt
CP002 JCP jcp-ws-75c9484747-sblwr /usr/server/tmp/log/CPsrv_log_002_00.txt
CP003 JCP jcp-ws-75c9484747-vq9pr /usr/server/tmp/log/CPsrv_log_003_00.txt
CP004 JCP jcp-ws-75c9484747-rnsrb /usr/server/tmp/log/CPsrv_log_004_00.txt
CP005 REST jcp-rest-76cd7bd477-kvn2g /usr/server/tmp/log/CPsrv_log_005_00.txt
CP006 JCP jcp-ws-75c9484747-gwtp8 /usr/server/tmp/log/CPsrv_log_006_00.txt
CP007 JCP jcp-ws-75c9484747-kfq68 /usr/server/tmp/log/CPsrv_log_007_00.txt
CP008 JCP jcp-ws-75c9484747-bdc8m /usr/server/tmp/log/CPsrv_log_008_00.txt
CP009 JCP jcp-ws-75c9484747-xv2wh /usr/server/tmp/log/CPsrv_log_009_00.txt
CP010 JCP jcp-ws-75c9484747-xlbm7 /usr/server/tmp/log/CPsrv_log_010_00.txt
CP011 JCP jcp-ws-75c9484747-jmhdq /usr/server/tmp/log/CPsrv_log_011_00.txt
CP013 JCP jcp-ws-75c9484747-hsgwm /usr/server/tmp/log/CPsrv_log_013_00.txt
CP014 CP ae-cp-b9b5848d4-g29sq /usr/server/tmp/log/CPsrv_log_014_00.txt
CP015 REST jcp-rest-76cd7bd477-4hz4p /usr/server/tmp/log/CPsrv_log_015_00.txt
WP001 DWP ae-wp-5bb4d7c578-hs2q5 /usr/server/tmp/log/WPsrv_log_001_00.txt
WP002 DWP ae-wp-5bb4d7c578-hctnd /usr/server/tmp/log/WPsrv_log_002_00.txt
WP003 DWP ae-wp-5bb4d7c578-8n74x /usr/server/tmp/log/WPsrv_log_003_00.txt
WP004 DWP ae-wp-5bb4d7c578-5h2lv /usr/server/tmp/log/WPsrv_log_004_00.txt
WP005 JWP-AUT jwp-54dc97b7cc-wl6jt /usr/server/tmp/log/WPsrv_log_005_00.txt
WP006 DWP ae-wp-5bb4d7c578-tswbm /usr/server/tmp/log/WPsrv_log_006_00.txt
WP007 DWP ae-wp-5bb4d7c578-9q64w /usr/server/tmp/log/WPsrv_log_007_00.txt
WP008 DWP ae-wp-5bb4d7c578-tddgn /usr/server/tmp/log/WPsrv_log_008_00.txt
WP009 JWP-IDX jwp-54dc97b7cc-w5b9x /usr/server/tmp/log/WPsrv_log_009_00.txt
WP010 DWP ae-wp-5bb4d7c578-vhsks /usr/server/tmp/log/WPsrv_log_010_00.txt
WP011 JWP-IDX jwp-54dc97b7cc-vhxf8 /usr/server/tmp/log/WPsrv_log_011_00.txt
WP012 JWP-AUT jwp-54dc97b7cc-fhl89 /usr/server/tmp/log/WPsrv_log_012_00.txt
WP013 DWP ae-wp-5bb4d7c578-mpcp6 /usr/server/tmp/log/WPsrv_log_013_00.txt
WP014 DWP ae-wp-5bb4d7c578-2gvtx /usr/server/tmp/log/WPsrv_log_014_00.txt
WP015 DWP ae-wp-5bb4d7c578-6xf7c /usr/server/tmp/log/WPsrv_log_015_00.txt
WP016 DWP ae-wp-5bb4d7c578-xz5t2 /usr/server/tmp/log/WPsrv_log_016_00.txt
WP017 DWP ae-wp-5bb4d7c578-hk8mx /usr/server/tmp/log/WPsrv_log_017_00.txt
WP018 DWP ae-wp-5bb4d7c578-8mnq5 /usr/server/tmp/log/WPsrv_log_018_00.txt
WP019 DWP ae-wp-5bb4d7c578-4t5h4 /usr/server/tmp/log/WPsrv_log_019_00.txt
WP020 DWP ae-wp-5bb4d7c578-b49p4 /usr/server/tmp/log/WPsrv_log_020_00.txt
WP021 PWP* ae-wp-5bb4d7c578-c66kd /usr/server/tmp/log/WPsrv_log_021_00.txt
WP022 JWP-UTL jwp-54dc97b7cc-kpgzs /usr/server/tmp/log/WPsrv_log_022_00.txt
WP023 JWP-IDX jwp-54dc97b7cc-v25qk /usr/server/tmp/log/WPsrv_log_023_00.txt
WP024 JWP-IDX jwp-54dc97b7cc-lsprl /usr/server/tmp/log/WPsrv_log_024_00.txt
WP025 JWP-AUT jwp-54dc97b7cc-nhtsl /usr/server/tmp/log/WPsrv_log_025_00.txt
WP026 JWP-AUT jwp-54dc97b7cc-hm8g7 /usr/server/tmp/log/WPsrv_log_026_00.txt
WP027 DWP ae-wp-5bb4d7c578-9wqx5 /usr/server/tmp/log/WPsrv_log_027_00.txt
WP028 JWP-PER jwp-54dc97b7cc-p2p97 /usr/server/tmp/log/WPsrv_log_028_00.txt
WP029 RWP+ ae-wp-5bb4d7c578-44wsg /usr/server/tmp/log/WPsrv_log_029_00.txt
WP030 DWP ae-wp-5bb4d7c578-xz7tb /usr/server/tmp/log/WPsrv_log_030_00.txt
WP031 DWP ae-wp-5bb4d7c578-xh69s /usr/server/tmp/log/WPsrv_log_031_00.txt
WP032 DWP ae-wp-5bb4d7c578-s56bd /usr/server/tmp/log/WPsrv_log_032_00.txt
WP033 WP ae-wp-5bb4d7c578-75zqs /usr/server/tmp/log/WPsrv_log_033_00.txt
WP034 DWP ae-wp-5bb4d7c578-g2jsp /usr/server/tmp/log/WPsrv_log_034_00.txt
WP035 DWP ae-wp-5bb4d7c578-9xhxt /usr/server/tmp/log/WPsrv_log_035_00.txt
WP036 DWP ae-wp-5bb4d7c578-v44st /usr/server/tmp/log/WPsrv_log_036_00.txt
WP037 DWP ae-wp-5bb4d7c578-sjr24 /usr/server/tmp/log/WPsrv_log_037_00.txt
WP038 OWP+ ae-wp-5bb4d7c578-hhf96 /usr/server/tmp/log/WPsrv_log_038_00.txt
WP039 DWP ae-wp-5bb4d7c578-c4b29 /usr/server/tmp/log/WPsrv_log_039_00.txt
WP040 DWP ae-wp-5bb4d7c578-9g9zk /usr/server/tmp/log/WPsrv_log_040_00.txt
These are based on my older Bash functions to identify AE server processes.
If some processes display without complete information, you may need to tweak the parameters of the search_logs
function in the various places this function is called, so that it searches older (or higher generation) logs. The right settings will depend on the particular system and its level of activity.
The current method I'm using to distinguish DWPs from non-dialog WPs is very inefficient, and is the reason the ae
function takes a long time to run the first time. When logging changes, WPs do not print a message indicating whether they are running or normal or dialog mode. Because of this, it's usually necessary to look through lots of old logs to find this information. If anyone knows of a more efficient approach, I would be glad to learn about it.
Enjoy!