vllm serve nanonets/Nanonets-OCR2-3B
from openai import OpenAI
import base64
client = OpenAI(base_url="http://localhost:8000/v1")
model = "nanonets/Nanonets-OCR2-3B"
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
def infer(img_base64):
response = client.chat.completions.create(
model=model,
messages=[
{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{img_base64}"},
},
{
"type": "text",
"text": "Extract the text from the above document as if you were reading it naturally.",
},
],
}
],
temperature=0.0,
max_tokens=15000
)
return response.choices[0].message.content
No comments:
Post a Comment