-
Notifications
You must be signed in to change notification settings - Fork 40
Expand file tree
/
Copy pathrun_multiple_simpleqa.sh
More file actions
executable file
·266 lines (229 loc) · 10.6 KB
/
run_multiple_simpleqa.sh
File metadata and controls
executable file
·266 lines (229 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#!/bin/bash
#==============================================================================
# SimpleQA Multiple Run Script
# Run SimpleQA with multiple configurations and generate plots
#==============================================================================
# Define the Python script path
# to be run from the Knowledge Graph of Things (KGoT) root folder
PYTHON_SCRIPT="benchmarks/simpleqa.py"
#------------------------------------------------------------------------------
# Configuration
#------------------------------------------------------------------------------
# Define an array of SimpleQA JSON file paths
simpleqa_files=(
"benchmarks/datasets/SimpleQA/dummy.json"
)
# Define the number of runs (change this to any number you want)
num_runs=1
# Attempt to locate GNU getopt via Homebrew
GETOPT_BIN="$(brew --prefix gnu-getopt 2>/dev/null)/bin/getopt"
# Check if GETOPT_BIN is non-empty and executable; if not, use the system getopt.
if [ -n "$GETOPT_BIN" ] && [ -x "$GETOPT_BIN" ]; then
GETOPT="$GETOPT_BIN"
else
GETOPT="$(command -v getopt)"
fi
echo "Using getopt: $GETOPT"
#------------------------------------------------------------------------------
# Handle arguments
#------------------------------------------------------------------------------
# Handle --help flag
if [ "$1" == "--help" ] || [ "$1" = "-h" ]; then
echo "Usage: ./run_multiple_simpleqa.sh [OPTIONS]"
echo ""
echo "Optional arguments:"
echo " --log_folder_base Directory where logs will be stored (default: logs/[DB_CHOICE]_[CONTROLLER_CHOICE]_[TOOL_CHOICE])"
echo " --attachment_folder Path to SimpleQA problems attachments folder (default: None)"
echo " --config_llm_path Path to LLM configuration file (default: kgot/config_llms.json)"
echo " --logger_level Logging level (default: 20 [INFO])"
echo " --logger_file_mode Log file mode (default: a)"
echo ""
echo " --neo4j_uri Docker URI for Neo4j (default: bolt://localhost:7687)"
echo " --neo4j_username Neo4j username (default: neo4j)"
echo " --neo4j_password Neo4j password (default: password)"
echo " --python_executor_uri URI for Python tool executor (default: http://localhost:16000/run)"
echo " --rdf4j_read_uri URI for RDF4J read endpoint (default: http://localhost:8080/rdf4j-server/repositories/kgot)"
echo " --rdf4j_write_uri URI for RDF4J write endpoint (default: http://localhost:8080/rdf4j-server/repositories/kgot/statements)"
echo ""
echo " --max_iterations Max iterations for KGoT (default: 7)"
echo " --num_next_steps_decision Number of next steps decision (default: 5)"
echo " --max_retrieve_query_retry Max retries for retrieve query (default: 3)"
echo " --max_cypher_fixing_retry Max retries for Cypher fixing (default: 3)"
echo " --max_final_solution_parsing Max retries for final solution parsing (default: 3)"
echo " --max_tool_retries Max retries for tools (default: 6)"
echo " --max_llm_retries Max retries for LLM (default: 6)"
echo ""
echo " --llm_planning_model LLM planning model (default: gpt-4o-mini)"
echo " --llm_planning_temperature LLM planning temperature (default: 0.0)"
echo " --llm_execution_model LLM execution model (default: gpt-4o-mini)"
echo " --llm_execution_temperature LLM execution temperature (default: 0.0)"
echo ""
echo " --controller_choice Controller choice (options: queryRetrieve, directRetrieve; default: queryRetrieve)"
echo " --db_choice Database choice (options: neo4j, networkX; default: neo4j)"
echo " --tool_choice Tool choice (default: tools_v2_3)"
echo " --gaia_formatter Use GAIA formatter"
echo " --disable_grader Disable the answer grader"
echo ""
exit 0
fi
# Initialize empty vars
# Defaults matching the Python script (excepting log_folder_base and gaia_file)
CONTROLLER_CHOICE_DEFAULT="queryRetrieve"
DB_CHOICE_DEFAULT="neo4j"
TOOL_CHOICE_DEFAULT="tools_v2_3"
MAX_ITERATIONS_DEFAULT=7
NEO4J_URI_DEFAULT="bolt://localhost:7687"
RDF4J_READ_URI_DEFAULT="http://localhost:8080/rdf4j-server/repositories/kgot"
RDF4J_WRITE_URI_DEFAULT="http://localhost:8080/rdf4j-server/repositories/kgot/statements"
PYTHON_EXECUTOR_URI_DEFAULT="http://localhost:16000/run"
LLM_EXECUTION_MODEL_DEFAULT="gpt-4o-mini"
LLM_EXECUTION_TEMPERATURE_DEFAULT=0.0
# Track values
LOG_FOLDER_BASE=""
CONTROLLER_CHOICE=""
DB_CHOICE=""
TOOL_CHOICE=""
MAX_ITERATIONS=""
NEO4J_URI=""
PYTHON_EXECUTOR_URI=""
LLM_EXECUTION_MODEL=""
LLM_EXECUTION_TEMPERATURE=""
RDF4J_READ_URI=""
RDF4J_WRITE_URI=""
GAIA_FORMATTER=false
DISABLE_GRADER=false
# Parse CLI arguments
OPTS=$($GETOPT -o "" \
--long log_folder_base:,attachment_folder:,config_llm_path:,logger_level:,logger_file_mode:,\
neo4j_uri:,neo4j_username:,neo4j_password:,python_executor_uri:,\
rdf4j_read_uri:,rdf4j_write_uri:,\
max_iterations:,num_next_steps_decision:,max_retrieve_query_retry:,max_cypher_fixing_retry:,\
max_final_solution_parsing:,max_tool_retries:,max_llm_retries:,\
llm_planning_model:,llm_planning_temperature:,llm_execution_model:,llm_execution_temperature:,\
controller_choice:,db_choice:,tool_choice:,gaia_formatter:,disable_grader \
-n 'run_multiple_simpleqa.sh' -- "$@")
if [ $? != 0 ]; then
echo "Failed to parse options." >&2
exit 1
fi
eval set -- "$OPTS"
ARGS=()
while true; do
case "$1" in
--log_folder_base) LOG_FOLDER_BASE="$2"; shift 2 ;;
--controller_choice) CONTROLLER_CHOICE="$2"; shift 2 ;;
--db_choice) DB_CHOICE="$2"; shift 2 ;;
--tool_choice) TOOL_CHOICE="$2"; shift 2 ;;
--max_iterations) MAX_ITERATIONS="$2"; shift 2 ;;
--neo4j_uri) NEO4J_URI="$2"; shift 2 ;;
--python_executor_uri) PYTHON_EXECUTOR_URI="$2"; shift 2 ;;
--rdf4j_read_uri) RDF4J_READ_URI="$2"; shift 2 ;;
--rdf4j_write_uri) RDF4J_WRITE_URI="$2"; shift 2 ;;
--llm_execution_model) LLM_EXECUTION_MODEL="$2"; shift 2 ;;
--llm_execution_temperature) LLM_EXECUTION_TEMPERATURE="$2"; shift 2 ;;
--gaia_formatter) GAIA_FORMATTER=true; shift ;;
--disable_grader) DISABLE_GRADER=true; shift ;;
--) shift; break ;;
*)
# For all other options, if set, add to ARGS
if [[ -n "$2" && "$2" != --* ]]; then
ARGS+=("$1" "$2"); shift 2
else
shift
fi
;;
esac
done
# Add gaia_formatter flag if set
if [ "$GAIA_FORMATTER" = true ]; then
ARGS+=("--gaia_formatter")
fi
# Add disable_grader flag if set
if [ "$DISABLE_GRADER" = true ]; then
ARGS+=("--disable_grader")
fi
# Use defaults if not explicitly provided
: "${MAX_ITERATIONS:=$MAX_ITERATIONS_DEFAULT}"
: "${CONTROLLER_CHOICE:=$CONTROLLER_CHOICE_DEFAULT}"
: "${DB_CHOICE:=$DB_CHOICE_DEFAULT}"
: "${TOOL_CHOICE:=$TOOL_CHOICE_DEFAULT}"
: "${NEO4J_URI:=$NEO4J_URI_DEFAULT}"
: "${PYTHON_EXECUTOR_URI:=$PYTHON_EXECUTOR_URI_DEFAULT}"
: "${RDF4J_READ_URI:=$RDF4J_READ_URI_DEFAULT}"
: "${RDF4J_WRITE_URI:=$RDF4J_WRITE_URI_DEFAULT}"
: "${LLM_EXECUTION_MODEL:=$LLM_EXECUTION_MODEL_DEFAULT}"
: "${LLM_EXECUTION_TEMPERATURE:=$LLM_EXECUTION_TEMPERATURE_DEFAULT}"
# Set log_folder_base default
LOG_FOLDER_BASE_DEFAULT="logs/${DB_CHOICE}_${CONTROLLER_CHOICE}_${TOOL_CHOICE}"
# If Zero_shot is true use another default name
if [ "$ZERO_SHOT" = true ]; then
LOG_FOLDER_BASE_DEFAULT="logs/${LLM_EXECUTION_MODEL}_${LLM_EXECUTION_TEMPERATURE}_zero_shot"
fi
# Use log_folder_base default if not explicitly provided
: "${LOG_FOLDER_BASE:=$LOG_FOLDER_BASE_DEFAULT}"
echo "KGoT Run Configuration:"
echo " log_folder_base: $LOG_FOLDER_BASE"
echo " controller_choice: $CONTROLLER_CHOICE"
echo " db_choice: $DB_CHOICE"
echo " tool_choice: $TOOL_CHOICE"
echo " gaia_formatter: $GAIA_FORMATTER"
echo
#------------------------------------------------------------------------------
# Main Script
#------------------------------------------------------------------------------
# Outer loop for the number of runs
for ((run=1; run<=num_runs; run++)); do
echo "Iteration: $run/$num_runs"
# Set up log folders for the run based on root folder
if [[ $num_runs -gt 1 ]]; then
run_log_folder="${LOG_FOLDER_BASE}/run_${run}"
else
run_log_folder="$LOG_FOLDER_BASE"
fi
log_folders=()
categories=()
for i in "${!simpleqa_files[@]}"; do
category=$(basename "${simpleqa_files[$i]}" .json)
categories+=("$category")
log_folders+=("${run_log_folder}/${category}")
done
# Inner loop to iterate over the arrays in parallel
for i in "${!simpleqa_files[@]}"; do
simpleqa_file=${simpleqa_files[$i]}
log_folder=${log_folders[$i]}
# Extract the base name from the SimpleQA file path to construct the output file name
base_name=$(basename "$simpleqa_file" .json)
echo "Running with SimpleQA file: $simpleqa_file and log folder: $log_folder and it is the [$i-th/${#simpleqa_files[@]}] elements"
echo "Output will be saved into the log folder in the cmd_log.log file"
echo
# Build the Python script command with all arguments
SCRIPT="$PYTHON_SCRIPT --log_folder_base $log_folder \
--file $simpleqa_file \
--neo4j_uri $NEO4J_URI \
--rdf4j_read_uri $RDF4J_READ_URI \
--rdf4j_write_uri $RDF4J_WRITE_URI \
--python_executor_uri $PYTHON_EXECUTOR_URI \
--controller_choice $CONTROLLER_CHOICE \
--db_choice $DB_CHOICE \
--tool_choice $TOOL_CHOICE \
--max_iterations $MAX_ITERATIONS \
--llm_execution_model $LLM_EXECUTION_MODEL \
--llm_execution_temperature $LLM_EXECUTION_TEMPERATURE "
# Add additional arguments from the ARGS array
SCRIPT="$SCRIPT ${ARGS[@]}"
echo "Running script:"
echo $SCRIPT
echo
# Change the Python version if needed and run the script
python3 $SCRIPT
done
# Create plots for this run
python3 benchmarks/plotters/plot_maker.py --root_directory "$run_log_folder" --categories "${categories[@]}" --max_iterations "$MAX_ITERATIONS" --benchmark "simpleqa"
done
#------------------------------------------------------------------------------
# Post-Processing
#------------------------------------------------------------------------------
# Move the snapshots to the log folder
if [ "$ZERO_SHOT" = false ]; then
mv kgot/knowledge_graph/_snapshots/$LOG_FOLDER_BASE $LOG_FOLDER_BASE/snapshots
fi