Skip to content

Commit 31e742a

Browse files
jmsteurlvbirgelen
authored andcommitted
Adding snapshot selection support for audit event and CG problems
1 parent e4be154 commit 31e742a

File tree

5 files changed

+247
-17
lines changed

5 files changed

+247
-17
lines changed

security/security-design/shared-assets/oci-security-health-check-forensics/README.md

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1-
# SHOW_OCI CSV Query Tool
1+
# OCI Security Health Check Forensics Tool
22

3-
The SHOW_OCI Query Tool is designed to load and analyze data from Oracle Cloud Infrastructure (OCI) environments using SQL. This tool enables users to import CSV files containing OCI resource information (e.g., compute instances, users, compartments) and perform SQL queries on the data.
3+
Last updated: 11 June 2025
4+
5+
The OCI Security Health Check Forensics Tool (the tool) is designed to load and analyze data from Oracle Cloud Infrastructure (OCI) environments. This tool enables users to import CSV files containing OCI resource information (e.g., compute instances, users, compartments) and perform SQL queries on the data. This data is used to investigate configuration issues etc.
6+
7+
The tool can also digest audit events and cloud guard problems. These resources can be loaded with different snapshots from a certain date with a number of days prior to that date.
8+
9+
This data can be used to investiage anomalies.
410

511
## Features
612
- Automatic OCI data fetching using showoci integration
713
- **Audit events** and **Cloud Guard problems** fetching with parallel processing
814
- Advanced filtering capabilities for age-based and compartment analysis
9-
- - Load CSV files with OCI data from multiple tenancies
15+
- Interactive tenancy selection from combined OCI configuration files
16+
- Load CSV files with OCI data from multiple tenancies
1017
- Execute SQL queries on the loaded data using DuckDB backend. Stay tuned for autonomous DB support.
1118
- Support for `SHOW TABLES` and `DESCRIBE table_name` commands
12-
- Interactive tenancy selection from combined OCI configuration files
1319
- Command history and help system
1420
- Batch query execution from YAML files
1521

@@ -281,4 +287,12 @@ The tool supports parallel fetching for large datasets:
281287
- Date-based filtering with flexible column support
282288
- Compartment hierarchy analysis and visualization
283289
- Support for complex nested data structures
284-
- Chainable filter operations on query results
290+
- Chainable filter operations on query results
291+
292+
# License
293+
294+
Copyright (c) 2025 Oracle and/or its affiliates.
295+
296+
Licensed under the Universal Permissive License (UPL), Version 1.0.
297+
298+
See [LICENSE](https://github.com/oracle-devrel/technology-engineering/blob/main/LICENSE) for more details.

security/security-design/shared-assets/oci-security-health-check-forensics/classes/commands/control_commands.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,21 @@
1313
from classes.query_selector import QuerySelector
1414
from classes.output_formatter import OutputFormatter
1515
from classes.commands.filter_commands import AgeFilterCommand, CompartmentFilterCommand
16+
import json
17+
import pandas as pd
18+
import os
1619

1720
class SetQueriesCommand(Command):
1821
"""
1922
Usage: set queries [<directory>]
2023
Launches an interactive YAML-file picker and loads the selected queries.
24+
If the YAML file contains a snapshot_type, prompts for snapshot file selection.
2125
"""
2226
description = """Loads queries from a YAML file for batch execution.
2327
Usage: set queries [directory]
2428
- If directory is not specified, uses default query directory
2529
- Opens an interactive file picker to select the YAML file
30+
- If YAML contains snapshot_type, prompts to select a snapshot file
2631
- Loads selected queries into the execution queue"""
2732

2833
def execute(self, args: str):
@@ -35,9 +40,101 @@ def execute(self, args: str):
3540
return
3641

3742
qs = QuerySelector(yaml_path)
43+
44+
# Check if snapshot file is needed
45+
if qs.snapshot_type:
46+
print(f"\nThis query file requires {qs.snapshot_type} snapshot data.")
47+
48+
# Get current snapshot directory
49+
snapshot_dir = self.ctx.query_executor.current_snapshot_dir
50+
if not snapshot_dir:
51+
print("Error: No active tenancy snapshot. Use 'set tenancy' first.")
52+
return
53+
54+
# Let user select snapshot file
55+
snapshot_file = qs.select_snapshot_file(snapshot_dir)
56+
if not snapshot_file:
57+
print("No snapshot file selected. Query loading cancelled.")
58+
return
59+
60+
# Load the snapshot file into DuckDB
61+
table_name = self._load_snapshot_to_duckdb(snapshot_file, qs.snapshot_type)
62+
if table_name:
63+
qs.set_snapshot_table(table_name)
64+
print(f"✓ Loaded snapshot data into table: {table_name}")
65+
else:
66+
print("Failed to load snapshot data. Query loading cancelled.")
67+
return
68+
69+
# Select queries (with possible snapshot substitution)
3870
qs.select_queries()
3971
self.ctx.query_selector = qs
4072
print(f"Loaded queries from '{yaml_path}' into queue.")
73+
74+
if qs.snapshot_type:
75+
print(f"Queries will use snapshot table: {qs.snapshot_table}")
76+
77+
def _load_snapshot_to_duckdb(self, json_file, snapshot_type):
78+
"""Load JSON file into DuckDB and return the table name."""
79+
try:
80+
# Generate table name based on filename
81+
filename = os.path.basename(json_file)
82+
if snapshot_type == "audit":
83+
table_name = filename.replace('audit_events_', '').replace('.json', '').replace('-', '')
84+
table_name = f"audit_events_{table_name}"
85+
elif snapshot_type == "cloudguard":
86+
table_name = filename.replace('cloudguard_problems_', '').replace('.json', '').replace('-', '_')
87+
table_name = f"cloudguard_problems_{table_name}"
88+
else:
89+
table_name = filename.replace('.json', '').replace('-', '_')
90+
91+
print(f"Loading {filename} into table {table_name}...")
92+
93+
with open(json_file, 'r', encoding='utf-8') as f:
94+
data = json.load(f)
95+
96+
if not data:
97+
print("Warning: JSON file contains no data")
98+
return None
99+
100+
# Check if table already exists
101+
existing_tables = self.ctx.query_executor.show_tables()
102+
if table_name in existing_tables:
103+
print(f"Table '{table_name}' already exists, using existing table.")
104+
return table_name
105+
106+
# Flatten nested JSON
107+
flattened = []
108+
for item in data:
109+
flat_item = {}
110+
self._flatten_dict(item, flat_item)
111+
flattened.append(flat_item)
112+
113+
df = pd.DataFrame(flattened)
114+
115+
# Register and create table
116+
self.ctx.query_executor.conn.register(table_name, df)
117+
self.ctx.query_executor.conn.execute(f"CREATE TABLE {table_name} AS SELECT * FROM {table_name}")
118+
print(f"Created table '{table_name}' with {len(df)} rows and {len(df.columns)} columns")
119+
120+
return table_name
121+
122+
except Exception as e:
123+
print(f"Error loading snapshot into DuckDB: {e}")
124+
return None
125+
126+
def _flatten_dict(self, d, flat_dict, prefix=''):
127+
"""Recursively flatten nested dictionaries and handle lists"""
128+
for k, v in d.items():
129+
key = f"{prefix}{k}" if prefix else k
130+
key = key.replace(' ', '_').replace('-', '_').replace('.', '_')
131+
132+
if isinstance(v, dict):
133+
self._flatten_dict(v, flat_dict, f"{key}_")
134+
elif isinstance(v, list):
135+
flat_dict[key] = json.dumps(v) if v else None
136+
else:
137+
flat_dict[key] = v
41138

42139
class SetTenancyCommand(Command):
43140
"""

security/security-design/shared-assets/oci-security-health-check-forensics/classes/query_selector.py

Lines changed: 73 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@
1111
import yaml
1212
import questionary
1313
import queue
14+
import os
15+
import glob
1416

1517
class QuerySelector:
1618
def __init__(self, yaml_file=None):
1719
"""Initialize QuerySelector with an optional YAML file path and a FIFO queue."""
1820
self.yaml_file = yaml_file
1921
self.query_queue = queue.Queue() # Always initialize an empty FIFO queue
22+
self.snapshot_type = None
23+
self.snapshot_table = None
2024

2125
if yaml_file:
2226
self.queries = self.load_queries()
@@ -25,15 +29,74 @@ def __init__(self, yaml_file=None):
2529
self.queries = [] # Empty query list if no file is provided
2630

2731
def load_queries(self):
28-
"""Load queries from a YAML file."""
32+
"""Load queries from a YAML file and check for snapshot_type."""
2933
try:
3034
with open(self.yaml_file, "r") as file:
3135
data = yaml.safe_load(file)
36+
# Check for snapshot_type parameter
37+
self.snapshot_type = data.get("snapshot_type", None)
3238
return data.get("queries", [])
3339
except Exception as e:
3440
print(f"Error loading YAML file: {e}")
3541
return []
3642

43+
def select_snapshot_file(self, snapshot_dir):
44+
"""Select a snapshot file based on the snapshot_type."""
45+
if not self.snapshot_type:
46+
return None
47+
48+
# Determine file pattern based on snapshot type
49+
if self.snapshot_type == "audit":
50+
pattern = os.path.join(snapshot_dir, "audit_events_*_*.json")
51+
elif self.snapshot_type == "cloudguard":
52+
pattern = os.path.join(snapshot_dir, "cloudguard_problems_*_*.json")
53+
else:
54+
print(f"Unknown snapshot type: {self.snapshot_type}")
55+
return None
56+
57+
# Find matching files
58+
files = glob.glob(pattern)
59+
60+
if not files:
61+
print(f"No {self.snapshot_type} snapshot files found in {snapshot_dir}")
62+
return None
63+
64+
# Prepare choices with metadata
65+
file_choices = []
66+
for file_path in sorted(files, key=os.path.getmtime, reverse=True):
67+
filename = os.path.basename(file_path)
68+
stat = os.stat(file_path)
69+
file_size = self._format_file_size(stat.st_size)
70+
71+
choice_text = f"{filename} ({file_size})"
72+
file_choices.append({
73+
'name': choice_text,
74+
'value': file_path
75+
})
76+
77+
# Let user select
78+
selected = questionary.select(
79+
f"Select a {self.snapshot_type} snapshot file for queries:",
80+
choices=[{'name': c['name'], 'value': c['value']} for c in file_choices]
81+
).ask()
82+
83+
return selected
84+
85+
def _format_file_size(self, size_bytes):
86+
"""Format file size in human readable format."""
87+
if size_bytes == 0:
88+
return "0 B"
89+
size_names = ["B", "KB", "MB", "GB"]
90+
import math
91+
i = int(math.floor(math.log(size_bytes, 1024)))
92+
p = math.pow(1024, i)
93+
s = round(size_bytes / p, 1)
94+
return f"{s} {size_names[i]}"
95+
96+
def set_snapshot_table(self, table_name):
97+
"""Set the snapshot table name for query substitution."""
98+
self.snapshot_table = table_name
99+
37100
def select_queries(self):
38101
"""Displays a list of query descriptions, allowing multiple selections, and pushes each item separately onto FIFO queue."""
39102
if not self.queries:
@@ -52,9 +115,15 @@ def select_queries(self):
52115
for query in self.queries:
53116
if query["description"] == choice:
54117
self.query_queue.put(("Description", query["description"]))
55-
self.query_queue.put(("SQL", query["sql"]))
118+
119+
# Substitute snapshot_table in SQL if needed
120+
sql = query["sql"]
121+
if self.snapshot_table and "{snapshot_data}" in sql:
122+
sql = sql.replace("{snapshot_data}", self.snapshot_table)
123+
124+
self.query_queue.put(("SQL", sql))
56125
if query.get("filter") != None:
57-
self.query_queue.put(("Filter", query.get("filter", "None"))) # Return filter as-is
126+
self.query_queue.put(("Filter", query.get("filter", "None")))
58127
break # Stop after adding matching query
59128

60129
def dequeue_item(self):
@@ -63,6 +132,4 @@ def dequeue_item(self):
63132
return self.query_queue.get()
64133
else:
65134
print("Queue is empty.")
66-
return None
67-
68-
135+
return None
Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,26 @@
1-
# Replace the table names for audit logs and cloudguard events
1+
snapshot_type: audit
2+
23
queries:
34
- description: "[FORENSIC]: Fetch distict set of eventtypes from the fetched audit logs window."
4-
sql: "SELECT DISTINCT event_type, source, data_event_name, data_compartment_name, data_identity_principal_name FROM audit_events_15042025_10"
5+
sql: "SELECT DISTINCT event_type, source, data_event_name, data_compartment_name, data_identity_principal_name FROM {snapshot_data}"
56

67
- description: "[FORENSIC] Get all the event_types etc and order them by priciple_name for IdentityControlPlane"
7-
sql: "SELECT data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name FROM audit_events_15042025_10 where source = 'IdentityControlPlane' GROUP BY data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name ORDER BY data_identity_principal_name"
8+
sql: "SELECT data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name FROM {snapshot_data} where source = 'IdentityControlPlane' GROUP BY data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name ORDER BY data_identity_principal_name"
89

910
- description: "[FORENSIC] Get all the event_types etc and order them by priciple_name for ConsoleSignIn"
10-
sql: "SELECT data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name FROM audit_events_15042025_10 where source = 'IdentitySignOn' GROUP BY data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name ORDER BY data_identity_principal_name"
11+
sql: "SELECT data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name FROM {snapshot_data} where source = 'IdentitySignOn' GROUP BY data_identity_principal_name, data_identity_ip_address, event_type, source, data_compartment_name, data_event_name ORDER BY data_identity_principal_name"
12+
13+
- description: "[FORENSIC] Find all administrative actions in the last period"
14+
sql: "SELECT event_time, data_event_name, data_identity_principal_name, data_resource_name FROM {snapshot_data} WHERE data_event_name LIKE '%Admin%' OR data_event_name LIKE '%Create%' OR data_event_name LIKE '%Delete%' OR data_event_name LIKE '%Update%' ORDER BY event_time DESC"
15+
16+
- description: "[FORENSIC] Show all unique users who performed actions"
17+
sql: "SELECT DISTINCT data_identity_principal_name, COUNT(*) as action_count FROM {snapshot_data} GROUP BY data_identity_principal_name ORDER BY action_count DESC"
18+
19+
- description: "[FORENSIC] Find all failed authentication attempts"
20+
sql: "SELECT event_time, data_identity_principal_name, data_event_name, data_response_response_time FROM {snapshot_data} WHERE data_event_name LIKE '%Failed%' OR data_response_status != 'SUCCEEDED' ORDER BY event_time DESC"
21+
22+
- description: "[FORENSIC] Show resource deletions"
23+
sql: "SELECT event_time, data_user_name, data_resource_name, data_event_name FROM {snapshot_data} WHERE data_event_name LIKE '%Delete%' ORDER BY event_time DESC"
24+
25+
- description: "[FORENSIC] Find policy changes"
26+
sql: "SELECT event_time, data_user_name, data_resource_name, data_event_name FROM {snapshot_data} WHERE event_type = 'Policy' OR event_type LIKE '%Policy%' ORDER BY event_time DESC"
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,40 @@
1-
# Replace the table names for audit logs and cloudguard events
1+
# queries/FORENSIC_CloudGuard.yaml
2+
snapshot_type: cloudguard
3+
24
queries:
35
- description: "[FORENSIC] Get all the CG problems sorted by resource_name"
4-
sql: "select resource_name, detector_rule_id, risk_level, labels, time_first_detected, time_last_detected, lifecycle_state, lifecycle_detail, detector_id from cloudguard_problems_10052025_12 ORDER BY resource_name"
6+
sql: >
7+
SELECT resource_name, detector_rule_id, risk_level, labels, time_first_detected, time_last_detected, lifecycle_state, lifecycle_detail, detector_id
8+
FROM {snapshot_data}
9+
ORDER BY resource_name"
10+
11+
- description: "[FORENSIC] Show all high-risk Cloud Guard problems"
12+
sql: >
13+
SELECT resource_name, detector_rule_id, risk_level, labels, time_first_detected, time_last_detected, lifecycle_state
14+
FROM {snapshot_data}
15+
WHERE risk_level = 'HIGH'
16+
ORDER BY time_last_detected DESC
17+
18+
- description: "[FORENSIC] Find problems by detector type"
19+
sql: >
20+
SELECT detector_id, COUNT(*) as problem_count
21+
FROM {snapshot_data}
22+
GROUP BY detector_id
23+
ORDER BY problem_count DESC
24+
25+
- description: "[FORENSIC] Show active problems (not resolved)"
26+
sql: >
27+
SELECT resource_name, detector_rule_id, risk_level, lifecycle_state, lifecycle_detail
28+
FROM {snapshot_data}
29+
WHERE lifecycle_state != 'RESOLVED'
30+
ORDER BY time_last_detected DESC
31+
32+
- description: "[FORENSIC] Find problems in specific compartments"
33+
sql: >
34+
SELECT ic.name as compartment_name, ic.path as compartment_path, COUNT(*) as problem_count
35+
FROM {snapshot_data} cp
36+
LEFT JOIN identity_compartments ic
37+
ON cp.compartment_id = ic.id
38+
GROUP BY cp.compartment_id, ic.name, ic.path
39+
ORDER BY problem_count DESC
40+

0 commit comments

Comments
 (0)