Skip to content

Commit 5017d64

Browse files
Merge branch 'main' into dependabot/pip/ai/gen-ai-agents/travel_agent/pillow-11.3.0
2 parents ee254af + b9c1e4d commit 5017d64

File tree

3 files changed

+160
-101
lines changed

3 files changed

+160
-101
lines changed
Lines changed: 150 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,57 @@
1-
import pandas as pd
1+
"""
2+
Smart Invoice Extraction
3+
4+
A Streamlit-based invoice data extraction tool leveraging Oracle Cloud Infrastructure (OCI)
5+
Generative AI models for multimodal (text + image) processing.
6+
7+
This module provides:
8+
9+
- Helper functions:
10+
- save_images(images, output_format="JPEG"):
11+
Convert PIL Image objects to in-memory byte streams for downstream processing.
12+
- encode_image(image_path):
13+
Read an image file from disk and return its Base64-encoded string.
14+
- save_to_csv(data, file_name="extracted_data.csv"):
15+
Persist a list of dicts to a CSV file.
16+
17+
- extractor(image_list):
18+
Uses the image of a PDF invoice to identify and extract key header fields, by:
19+
• Initializing an LLM (meta.llama-3.2-90b-vision-instruct)
20+
• Encoding the image as Base64
21+
• Sending a system + human message prompt to extract invoice headers in list format.
22+
23+
- invoiceAnalysisPlus():
24+
The main Streamlit application which:
25+
• Renders a UI for uploading PDF invoices
26+
• Converts PDFs to JPEG images and prepares byte streams
27+
• Invokes extractor() to propose candidate fields
28+
• Constructs and sends prompts to two OCI LLMs:
29+
– A vision model for image-based extraction
30+
– A text-only model (cohere.command-r-plus-08-2024) for dynamic prompt generation
31+
• Displays results in a DataFrame and saves them to CSV
32+
33+
Usage:
34+
Run the module as a standalone script:
35+
streamlit run invoice_analysis.py
36+
37+
Author: Ali Ottoman
38+
"""
39+
40+
import io
241
import json
3-
from langchain.chains.llm import LLMChain
4-
from langchain_core.prompts import PromptTemplate
42+
import base64
43+
import pandas as pd
544
import streamlit as st
645
from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI
746
from langchain_core.messages import HumanMessage, SystemMessage
8-
import base64
947
from pdf2image import convert_from_bytes
10-
import io
48+
1149

1250
# Helper function to convert a list of images into byte arrays for further processing
1351
def save_images(images, output_format="JPEG"):
52+
"""
53+
Convert PIL Image objects to in-memory byte streams for downstream processing.
54+
"""
1455
image_list = []
1556
for image in images:
1657
img_byte_arr = io.BytesIO()
@@ -21,17 +62,26 @@ def save_images(images, output_format="JPEG"):
2162

2263
# Helper function to encode an image to base64 for sending to LLM
2364
def encode_image(image_path):
65+
"""
66+
Read an image file from disk and return its Base64-encoded string.
67+
"""
2468
with open(image_path, "rb") as image_file:
2569
return base64.b64encode(image_file.read()).decode("utf-8")
2670

2771
# Save extracted data to a CSV file and show success message in Streamlit
2872
def save_to_csv(data, file_name="extracted_data.csv"):
73+
"""
74+
Persist a list of dicts to a CSV file.
75+
"""
2976
df = pd.DataFrame(data)
3077
df.to_csv(file_name, index=False)
3178
st.success(f"Data saved to {file_name}")
3279

3380
# Extract key headers from the first image of a PDF invoice
3481
def extractor(image_list):
82+
"""
83+
Uses the image of a PDF invoice to identify and extract key header fields
84+
"""
3585
# Replace this with your own compartment ID
3686
compID = "<YOUR_COMPARTMENT_OCID_HERE>"
3787

@@ -68,155 +118,156 @@ def extractor(image_list):
68118
return eval(ai_response.content)
69119

70120
# Main Streamlit app function
71-
def invoiceAnalysisPlus():
121+
def invoice_analysis_plus():
122+
"""
123+
The main Streamlit application
124+
"""
72125
st.title("Invoice Data Extraction")
73-
126+
74127
with st.sidebar:
75128
st.title("Parameters")
76-
# User prompt input
129+
# Replace with your own compartment ID
130+
compID = "<YOUR_COMPARTMENT_OCID_HERE>"
77131
user_prompt = st.text_input("Input the elements you are looking to extract here")
78132
st.caption("Our AI assistant has extracted the following key elements from the invoice. Please select the elements you wish to extract.")
79133

80-
81134
uploaded_file = st.file_uploader("Upload your invoices here:", type=["pdf"])
82-
135+
83136
if uploaded_file is not None:
84137
with st.spinner("Processing..."):
138+
# Convert PDF to image list
85139
if uploaded_file.type == "application/pdf":
86140
images = convert_from_bytes(uploaded_file.read(), fmt="jpeg")
87141
else:
88142
images = [convert_from_bytes(uploaded_file.read(), fmt="jpeg")[0]]
89-
90-
image_list = save_images(images) # Convert to byte arrays
91-
143+
144+
# Save as byte streams
145+
image_list = save_images(images)
146+
147+
# Load both image-based and text-based LLMs
92148
llm = ChatOCIGenAI(
93-
model_id="meta.llama-3.2-90b-vision-instruct",
94-
compartment_id="", #TO-DO: Add your compartment ID here
149+
model_id="meta.llama-3.2-90b-vision-instruct", # Replace with your model ID
150+
compartment_id=compID,
95151
model_kwargs={"max_tokens": 2000, "temperature": 0}
96152
)
97153
llm_for_prompts = ChatOCIGenAI(
98-
model_id="cohere.command-r-plus-08-2024",
99-
compartment_id="",#TO-DO: Add your compartment ID here
154+
model_id="cohere.command-r-plus-08-2024", # Replace with your model ID
155+
compartment_id=compID,
100156
model_kwargs={"max_tokens": 2000, "temperature": 0}
101157
)
102-
103-
# Options for data types
104-
data_types = [ "Text", "Number", "Percentage", "Date"]
105-
106-
# Lists to store names and their types
158+
159+
# Select box UI for user to pick elements and their data types
160+
data_types = ["Text", "Number", "Percentage", "Date"]
107161
elements = []
162+
108163
if "availables" not in st.session_state:
109164
st.session_state.availables = extractor(image_list)
110-
for i in range(3): # Adjust 'n' for the maximum number of selections
111-
col1, col2 = st.columns([2, 1]) # Adjust width ratio if needed
112-
113-
with col1:
114-
# Preserve user selection across reruns
165+
166+
for i in range(3): # Max 3 fields
167+
col1, col2 = st.columns([2, 1])
168+
with col1:
115169
name = st.selectbox(f"Select an element {i+1}", st.session_state.availables, key=f"name_{i}", index=i)
116170
with col2:
117171
data_type = st.selectbox(f"Type {i+1}", data_types, key=f"type_{i}")
118172
elements.append((name, data_type))
119173

120-
if elements is not None:
174+
# Generate appropriate prompt based on selected or input fields
175+
if elements:
121176
system_message_cohere = SystemMessage(
122-
content=f"""
123-
Based on the following set of elements {elements}, with their respective types ({elements[0][1]}, {elements[1][1]}, {elements[2][1]}), Extract the following details and provide the response only in valid JSON format (no extra explanation or text):
124-
- {elements[0][0]}
125-
- {elements[1][0]}
126-
- {elements[2][0]}
127-
Ensure the extracted data is formatted correctly as JSON and include nothing else at all in the response, not even a greeting or closing.
128-
For example:
129-
{{
130-
{elements[0][0]}: "296969",
131-
{elements[1][0]}: "296969",
132-
{elements[2][0]}: "296969",
133-
}}
134-
""")
177+
content=f"""
178+
Based on the following set of elements {elements}, with their respective types ({elements[0][1]}, {elements[1][1]}, {elements[2][1]}), Extract the following details and provide the response only in valid JSON format (no extra explanation or text):
179+
- {elements[0][0]}
180+
- {elements[1][0]}
181+
- {elements[2][0]}
182+
Ensure the extracted data is formatted correctly as JSON and include nothing else at all in the response, not even a greeting or closing.
183+
For example:
184+
{{
185+
{elements[0][0]}: "296969",
186+
{elements[1][0]}: "296969",
187+
{elements[2][0]}: "296969",
188+
}}
189+
""")
135190
ai_response_cohere = system_message_cohere
136191
else:
137-
# Cohere section for generating the prompt
138192
system_message_cohere = SystemMessage(
139-
content=f"""
140-
Based on the following system prompt, create a new prompt accordingly based on the elements specified in the user prompt here ({user_prompt}).
141-
142-
This is the system prompt template:
143-
"
144-
Extract the following details and provide the response only in valid JSON format (no extra explanation or text):
145-
- **Debit / Credit Note No.**
146-
- **Policy Period**
147-
- **Insured**
148-
- **Vessel Name**
149-
- **Details**
150-
- **Currency**
151-
- **Gross Premium 100%**
152-
- **OIMSL Share**
153-
- **Total Deductions**
154-
- **Net Premium**
155-
- **Premium Schedule**
156-
- **Installment Amount**
157-
158-
Ensure the extracted data is formatted correctly as JSON and include nothing else at all in the response, not even a greeting or closing.
159-
160-
For example:
161-
162-
"Debit / Credit Note No.": "296969",
163-
"Policy Period": "Feb 20, 2024 to Jul 15, 2025",
164-
"Insured": "Stealth Maritime Corp. S.A.",
165-
"Vessel Name": "SUPRA DUKE - HULL & MACHINERY", (Make sure this is the entire vessel name only)
166-
"Details": "SUPRA DUKE - Original Premium",
167-
"Currency": "USD",
168-
"Gross Premium 100%": 56973.63,
169-
"OIMSL Share": 4557.89,
170-
"Total Deductions": 979.92,
171-
"Net Premium": 3577.97,
172-
"Premium Schedule": ["Apr 20, 2024", "Jun 14, 2024", "Sep 13, 2024", "Dec 14, 2024", "Mar 16, 2025", "Jun 14, 2025"],
173-
"Installment Amount": [372.87, 641.02, 641.02, 641.02, 641.02, 641.02]
174-
175-
)" ensure your response is a system prompt format with an example of what the ouput should look like. Also ensure to mention in your gernerated prompt that no other content whatsover should appear except the JSON
193+
content = f"""
194+
Based on the following system prompt, create a new prompt accordingly based on the elements specified in the user prompt here ({user_prompt}).
195+
196+
This is the system prompt template:
197+
"
198+
Extract the following details and provide the response only in valid JSON format (no extra explanation or text):
199+
- **Debit / Credit Note No.**
200+
- **Policy Period**
201+
- **Insured**
202+
- **Vessel Name**
203+
- **Details**
204+
- **Currency**
205+
- **Gross Premium 100%**
206+
- **OIMSL Share**
207+
- **Total Deductions**
208+
- **Net Premium**
209+
- **Premium Schedule**
210+
- **Installment Amount**
211+
212+
Ensure the extracted data is formatted correctly as JSON and include nothing else at all in the response, not even a greeting or closing.
213+
214+
For example:
215+
216+
"Debit / Credit Note No.": "296969",
217+
"Policy Period": "Feb 20, 2024 to Jul 15, 2025",
218+
"Insured": "Stealth Maritime Corp. S.A.",
219+
"Vessel Name": "SUPRA DUKE - HULL & MACHINERY", (Make sure this is the entire vessel name only)
220+
"Details": "SUPRA DUKE - Original Premium",
221+
"Currency": "USD",
222+
"Gross Premium 100%": 56973.63,
223+
"OIMSL Share": 4557.89,
224+
"Total Deductions": 979.92,
225+
"Net Premium": 3577.97,
226+
"Premium Schedule": ["Apr 20, 2024", "Jun 14, 2024", "Sep 13, 2024", "Dec 14, 2024", "Mar 16, 2025", "Jun 14, 2025"],
227+
"Installment Amount": [372.87, 641.02, 641.02, 641.02, 641.02, 641.02]
228+
229+
)" ensure your response is a system prompt format with an example of what the ouput should look like. Also ensure to mention in your gernerated prompt that no other content whatsover should appear except the JSON
176230
""")
177231
ai_response_cohere = llm_for_prompts.invoke(input=[system_message_cohere])
178-
print(ai_response_cohere)
179232

233+
# Extracted data list
180234
extracted_data = []
181-
235+
182236
with st.spinner("Analyzing invoice..."):
183237
for idx, img_byte_arr in enumerate(image_list):
184238
try:
185-
# Convert the image to base64 directly from memory
186239
encoded_frame = base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
187-
if elements is not None:
240+
241+
if elements:
188242
system_message = ai_response_cohere
189243
else:
190-
system_message = SystemMessage(
191-
content=ai_response_cohere.content)
244+
system_message = SystemMessage(content=ai_response_cohere.content)
245+
192246
human_message = HumanMessage(
193247
content=[
194248
{"type": "text", "text": "This is my invoice"},
195249
{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_frame}"}},
196250
]
197251
)
252+
198253
ai_response = llm.invoke(input=[human_message, system_message])
199-
print(ai_response.content)
200-
index = ai_response.content.find('{')
201-
index2 = ai_response.content.find('}')
202-
x = ai_response.content[index:]
203-
x2 = x[:index2+1]
204-
print(x2)
205-
response_dict = json.loads(x2)
206-
207-
# Add metadata for tracking
208-
response_dict["File Name"] = uploaded_file.name
209-
response_dict["Page Number"] = idx + 1
254+
json_start = ai_response.content.find('{')
255+
json_end = ai_response.content.find('}', json_start)
256+
json_data = ai_response.content[json_start:json_end + 1]
210257

258+
response_dict = json.loads(json_data)
259+
response_dict["File Name"] = uploaded_file.name
260+
response_dict["Page Number"] = idx + 1
211261
extracted_data.append(response_dict)
212262

213263
except Exception as e:
214264
st.error(f"Error processing page {idx+1}: {str(e)}")
215-
265+
266+
# Display and save results
216267
if extracted_data:
217268
save_to_csv(extracted_data)
218269
st.dataframe(pd.DataFrame(extracted_data))
219270

220-
# Run the chatbot function
271+
# Run the app
221272
if __name__ == "__main__":
222-
invoiceAnalysisPlus()
273+
invoice_analysis_plus()

cloud-infrastructure/ai-infra-gpu/ai-infrastructure/nemo-megatron-training-oke/README.md

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ on the Oracle Container Engine for Kubernetes (OKE) using
88
Reference results from NVIDIA to train Llama 3 can be found on the
99
[NGC Catalog](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/dgxc-benchmarking/resources/llama3-dgxc-benchmarking).
1010

11-
Reviewed: 18.03.2025
11+
Reviewed: 01.07.2025
1212

1313
# When to use this asset?
1414

@@ -31,7 +31,14 @@ This guide is loosely based on the
3131
[to the instructions](https://github.com/oracle-quickstart/oci-hpc-oke/tree/main#instructions-for-deploying-an-oke-cluster-with-gpus-and-rdma-connectivity),
3232
importing one of the images and creating a GPU partition with BM.GPU.H100.8 nodes.
3333

34-
The configuration here assumes a minimum of 16 BM.GPU.H100.8 nodes.
34+
The configuration here assumes a minimum of 1 BM.GPU.H100.8 node for
35+
training with 8B parameters, and a minimum of 8 BM.GPU.H100.8 nodes for 70B
36+
parameters.
37+
38+
If another shape is used, the NCCL and MPI parameters in the Kubernetes
39+
[configuration map](./files/training/templates/mpi.yaml) should be adapted
40+
using the same parameter values as the
41+
[performance testing scripts](https://github.com/oracle-quickstart/oci-hpc-oke/tree/main/manifests/nccl-tests).
3542

3643
- Ensure that the follwing setting is selected under the "OKE Cluster" section:
3744

cloud-infrastructure/compute-including-hpc/compute-software/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ This page contains information and useful links regarding Compute services that
2020
- [Youtube Video: Using a Network Bridge with KVM VMs on Oracle Linux](https://www.youtube.com/watch?v=CXBTBxFoSKI&t=120s)
2121
- [Youtube Video: Manage OCI instances directly from VirtualBox 7](https://www.youtube.com/watch?v=uFEN4Di-WDE)
2222

23+
- [Olygo Github: OCI_Compute_Function_Reserved_Pip_Allocator](https://github.com/Olygo/OCI-FN_reserved_pip_allocator)
2324
- [Olygo Github: OCI Compute Capacity Report using CloudShell](https://github.com/Olygo/OCI_ComputeCapacityReport)
2425
- [Olygo Github: Learn how to troubleshoot Linux and Windows instances using OCI Console Connection](https://github.com/Olygo/OCI_Console-Connections)
2526
- [Olygo Github: OCI function that forces IMDSv2 on compute instances](https://github.com/Olygo/OCI-FN_IMDS-Watcher)

0 commit comments

Comments
 (0)