Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ private function buildGenerationConfig(AIChatRequest $request, ?ResponseFormatIn
);
}

if ($responseMimeType instanceof ResponseMimeType || $responseSchema instanceof Schema) {
if ($responseMimeType instanceof ResponseMimeType) {
return new GenerationConfig(
responseMimeType: $responseMimeType,
responseSchema: $responseSchema,
Expand Down
67 changes: 67 additions & 0 deletions packages/mistral/examples/ocr-blocks.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
<?php

declare(strict_types=1);

/*
* This file is part of the Modelflow AI package.
*
* (c) Johannes Wachter <johannes@sulu.io>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use ModelflowAi\Mistral\Mistral;
use ModelflowAi\Mistral\Model;
use Symfony\Component\Dotenv\Dotenv;

require_once __DIR__ . '/../vendor/autoload.php';

(new Dotenv())->bootEnv(__DIR__ . '/.env');

$apiKey = $_ENV['MISTRAL_API_KEY'] ?? null;
if (!\is_string($apiKey)) {
throw new RuntimeException('The MISTRAL_API_KEY environment variable is required.');
}

$client = Mistral::client($apiKey);
$document = $argv[1] ?? 'https://arxiv.org/pdf/2201.04234';

// The Mistral OCR API requires the document to be an https URL or a base64
// data URI. A local file path must be read and encoded before sending.
if (\is_file($document)) {
$contents = \file_get_contents($document);
if (false === $contents) {
throw new RuntimeException(\sprintf('Unable to read file "%s".', $document));
}

$mimeType = \mime_content_type($document) ?: 'application/pdf';
$documentUrl = \sprintf('data:%s;base64,%s', $mimeType, \base64_encode($contents));
} else {
$documentUrl = $document;
}

// Comma-separated, zero-based page indexes (e.g. "0,1,2"). Defaults to the first page.
$pages = $argv[2] ?? '0';

$response = $client->ocr()->process([
'model' => Model::OCR_4->value,
'document' => [
'type' => 'document_url',
'document_url' => $documentUrl,
],
'pages' => $pages,
'include_blocks' => true,
]);

foreach ($response->pages as $page) {
echo \sprintf("=== Page %d ===\n", $page->index);

foreach ($page->blocks as $block) {
echo \sprintf(
"[%s] %s\n",
$block->type,
\str_replace("\n", ' ', $block->content),
);
}
}
74 changes: 74 additions & 0 deletions packages/mistral/examples/ocr.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php

declare(strict_types=1);

/*
* This file is part of the Modelflow AI package.
*
* (c) Johannes Wachter <johannes@sulu.io>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

use ModelflowAi\Mistral\Mistral;
use ModelflowAi\Mistral\Model;
use Symfony\Component\Dotenv\Dotenv;

require_once __DIR__ . '/../vendor/autoload.php';

(new Dotenv())->bootEnv(__DIR__ . '/.env');

$apiKey = $_ENV['MISTRAL_API_KEY'] ?? null;
if (!\is_string($apiKey)) {
throw new RuntimeException('The MISTRAL_API_KEY environment variable is required.');
}

$client = Mistral::client($apiKey);
$document = $argv[1] ?? 'https://arxiv.org/pdf/2201.04234';

// The Mistral OCR API requires the document to be an https URL or a base64
// data URI. A local file path must be read and encoded before sending.
if (\is_file($document)) {
$contents = \file_get_contents($document);
if (false === $contents) {
throw new RuntimeException(\sprintf('Unable to read file "%s".', $document));
}

$mimeType = \mime_content_type($document) ?: 'application/pdf';
$documentUrl = \sprintf('data:%s;base64,%s', $mimeType, \base64_encode($contents));
} else {
$documentUrl = $document;
}

// Comma-separated, zero-based page indexes (e.g. "0,1,2"). Defaults to the first page.
$pages = $argv[2] ?? '0';

$response = $client->ocr()->process([
'model' => Model::OCR->value,
'document' => [
'type' => 'document_url',
'document_url' => $documentUrl,
],
'pages' => $pages,
// Pull header/footer into separate fields so $page->markdown is the main content only.
'extract_header' => true,
'extract_footer' => true,
]);

foreach ($response->pages as $page) {
echo \sprintf("=== Page %d ===\n", $page->index);

if (null !== $page->header) {
echo '--- Header ---' . \PHP_EOL;
echo $page->header . \PHP_EOL;
}

echo '--- Content ---' . \PHP_EOL;
echo $page->markdown . \PHP_EOL;

if (null !== $page->footer) {
echo '--- Footer ---' . \PHP_EOL;
echo $page->footer . \PHP_EOL;
}
}
12 changes: 12 additions & 0 deletions packages/mistral/src/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
use ModelflowAi\Mistral\Resources\ChatInterface;
use ModelflowAi\Mistral\Resources\Embeddings;
use ModelflowAi\Mistral\Resources\EmbeddingsInterface;
use ModelflowAi\Mistral\Resources\Ocr;
use ModelflowAi\Mistral\Resources\OcrInterface;

final readonly class Client implements ClientInterface
{
Expand Down Expand Up @@ -45,4 +47,14 @@ public function embeddings(): EmbeddingsInterface
{
return new Embeddings($this->transport);
}

/**
* Extract text and structured content from documents and images.
*
* @see https://docs.mistral.ai/api/endpoint/ocr
*/
public function ocr(): OcrInterface
{
return new Ocr($this->transport);
}
}
8 changes: 8 additions & 0 deletions packages/mistral/src/ClientInterface.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

use ModelflowAi\Mistral\Resources\ChatInterface;
use ModelflowAi\Mistral\Resources\EmbeddingsInterface;
use ModelflowAi\Mistral\Resources\OcrInterface;

interface ClientInterface
{
Expand All @@ -31,4 +32,11 @@ public function chat(): ChatInterface;
* @see https://docs.mistral.ai/api/#operation/createEmbedding
*/
public function embeddings(): EmbeddingsInterface;

/**
* Extract text and structured content from documents and images.
*
* @see https://docs.mistral.ai/api/endpoint/ocr
*/
public function ocr(): OcrInterface;
}
2 changes: 2 additions & 0 deletions packages/mistral/src/Model.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ enum Model: string
case LARGE = 'mistral-large-latest';
case NEMO = 'open-mistral-nemo';
case EMBED = 'mistral-embed';
case OCR = 'mistral-ocr-latest';
case OCR_4 = 'mistral-ocr-4-0';
case PIXTRAL_LARGE = 'pixtral-large-latest';

public function jsonSupported(): bool
Expand Down
140 changes: 140 additions & 0 deletions packages/mistral/src/Resources/Ocr.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
<?php

declare(strict_types=1);

/*
* This file is part of the Modelflow AI package.
*
* (c) Johannes Wachter <johannes@sulu.io>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace ModelflowAi\Mistral\Resources;

use ModelflowAi\ApiClient\Transport\Payload;
use ModelflowAi\ApiClient\Transport\TransportInterface;
use ModelflowAi\Mistral\Model;
use ModelflowAi\Mistral\Responses\Ocr\ProcessResponse;
use Webmozart\Assert\Assert;

final readonly class Ocr implements OcrInterface
{
public function __construct(
private TransportInterface $transport,
) {
}

public function process(array $parameters): ProcessResponse
{
$this->validateParameters($parameters);
$parameters['model'] ??= Model::OCR->value;
$parameters['document']['type'] ??= $this->detectDocumentType($parameters['document']);

$payload = Payload::create('ocr', $parameters);

$response = $this->transport->requestObject($payload);

return ProcessResponse::from($response->data, $response->meta);
}
Comment thread
wachterjohannes marked this conversation as resolved.

/**
* @param array<string, mixed> $parameters
*/
private function validateParameters(array $parameters): void
{
if (isset($parameters['model'])) {
Assert::string($parameters['model']);
}

Assert::keyExists($parameters, 'document');
Assert::isArray($parameters['document']);
$documentType = $parameters['document']['type'] ?? $this->detectDocumentType($parameters['document']);
Assert::string($documentType);
Assert::inArray($documentType, ['document_url', 'image_url', 'file']);

if ('document_url' === $documentType) {
Assert::keyExists($parameters['document'], 'document_url');
Assert::string($parameters['document']['document_url']);

if (isset($parameters['document']['document_name'])) {
Assert::string($parameters['document']['document_name']);
}
}

if ('image_url' === $documentType) {
Assert::keyExists($parameters['document'], 'image_url');
Assert::string($parameters['document']['image_url']);
}

if ('file' === $documentType) {
Assert::keyExists($parameters['document'], 'file_id');
Assert::string($parameters['document']['file_id']);
}

if (isset($parameters['pages'])) {
if (\is_array($parameters['pages'])) {
Assert::allInteger($parameters['pages']);
} else {
Assert::string($parameters['pages']);
}
}

if (isset($parameters['include_blocks'])) {
Assert::boolean($parameters['include_blocks']);
}
if (isset($parameters['include_image_base64'])) {
Assert::boolean($parameters['include_image_base64']);
}
if (isset($parameters['image_limit'])) {
Assert::integer($parameters['image_limit']);
}
if (isset($parameters['image_min_size'])) {
Assert::integer($parameters['image_min_size']);
}
if (isset($parameters['bbox_annotation_format'])) {
Assert::isArray($parameters['bbox_annotation_format']);
}
if (isset($parameters['document_annotation_format'])) {
Assert::isArray($parameters['document_annotation_format']);
}
if (isset($parameters['document_annotation_prompt'])) {
Assert::string($parameters['document_annotation_prompt']);
}
if (isset($parameters['table_format'])) {
Assert::string($parameters['table_format']);
Assert::inArray($parameters['table_format'], ['markdown', 'html']);
}
if (isset($parameters['extract_header'])) {
Assert::boolean($parameters['extract_header']);
}
if (isset($parameters['extract_footer'])) {
Assert::boolean($parameters['extract_footer']);
}
if (isset($parameters['confidence_scores_granularity'])) {
Assert::string($parameters['confidence_scores_granularity']);
Assert::inArray($parameters['confidence_scores_granularity'], ['word', 'page']);
}
}

/**
* @param array<mixed> $document
*/
private function detectDocumentType(array $document): string
{
if (isset($document['document_url'])) {
return 'document_url';
}

if (isset($document['image_url'])) {
return 'image_url';
}

if (isset($document['file_id'])) {
return 'file';
}

throw new \InvalidArgumentException('The document type could not be detected.');
}
}
39 changes: 39 additions & 0 deletions packages/mistral/src/Resources/OcrInterface.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php

declare(strict_types=1);

/*
* This file is part of the Modelflow AI package.
*
* (c) Johannes Wachter <johannes@sulu.io>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace ModelflowAi\Mistral\Resources;

use ModelflowAi\Mistral\Responses\Ocr\ProcessResponse;

interface OcrInterface
{
/**
* @param array{
* model?: string|null,
* document: array<string, mixed>,
* pages?: string|int[]|null,
* include_blocks?: bool|null,
* include_image_base64?: bool|null,
* image_limit?: int|null,
* image_min_size?: int|null,
* bbox_annotation_format?: array<string, mixed>|null,
* document_annotation_format?: array<string, mixed>|null,
* document_annotation_prompt?: string|null,
* table_format?: 'markdown'|'html'|null,
* extract_header?: bool,
* extract_footer?: bool,
* confidence_scores_granularity?: 'word'|'page'|null,
* } $parameters
*/
public function process(array $parameters): ProcessResponse;
}
Loading
Loading