Tuesday, March 17, 2026

How to server OCR local model and provide inference

vllm serve nanonets/Nanonets-OCR2-3B

from openai import OpenAI

import base64


client = OpenAI(base_url="http://localhost:8000/v1")

model = "nanonets/Nanonets-OCR2-3B"


def encode_image(image_path):

    with open(image_path, "rb") as image_file:

        return base64.b64encode(image_file.read()).decode("utf-8")


def infer(img_base64):

    response = client.chat.completions.create(

        model=model,

        messages=[

            {

                "role": "user",

                "content": [

                    {

                        "type": "image_url",

                        "image_url": {"url": f"data:image/png;base64,{img_base64}"},

                    },

                    {

                        "type": "text",

                        "text": "Extract the text from the above document as if you were reading it naturally.",

                    },

                ],

            }

        ],

        temperature=0.0,

        max_tokens=15000

    )

    return response.choices[0].message.content



No comments:

Post a Comment