async def document_analysis(filename: str) -> str:
"""
Document Understanding
Args:
filename: pdf filename str
"""
pdf = pdfium.PdfDocument(filename)
images = []
print("Retrieved PDF ",len(pdf))
for i in range(len(pdf)):
print("Iter count ", i)
page = pdf[i]
print("Got the page ", page)
image = page.render(scale=8).to_pil()
buffered = BytesIO()
image.save(buffered, format="JPEG")
img_byte = buffered.getvalue()
img_base64 = base64.b64encode(img_byte).decode("utf-8")
images.append(img_base64)
text_of_pages = await asyncio.gather(*[parse_page_with_gpt(image) for image in images])
print("Text of pages got")
results = []
extracted_texts = [doc for doc in text_of_pages]
# Clean each string in the list and append to json_results
for text in extracted_texts:
results.append(text)
return results
async def parse_page_with_gpt(base64_image: str) -> str:
messages=[
{
"role": "system",
"content": """
You are a helpful assistant that extracts information from images.
"""
},
{
"role": "user",
"content": [
{"type": "text", "text": "Extract information from image into text"},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}",
"detail": "auto"
},
},
],
}
]
response = await clienta.chat.completions.create(
model=MODEL,
messages=messages,
temperature=0,
max_tokens=4096,
)
return response.choices[0].message.content or ""
No comments:
Post a Comment