openapi: 3.1.0
info:
  title: llama-parse
  description: PDF and document parsing via LlamaCloud API with sync, async, and queue-based processing
  version: v1

servers:
  - url: https://llama-parse.your-subdomain.workers.dev

security:
  - ApiKeyAuth: []
  - WorkflowAuth: []
  - OrchestratorAuth: []

components:
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: X-API-Key
    WorkflowAuth:
      type: apiKey
      in: header
      name: X-Workflow-Key
    OrchestratorAuth:
      type: apiKey
      in: header
      name: X-Orchestrator-Key

  schemas:
    ParseConfig:
      type: object
      properties:
        disable_cache:
          type: boolean
        target_pages:
          type: string
        max_pages:
          type: integer
        language:
          type: string
        custom_prompt:
          type: string
        extract_printed_page_number:
          type: boolean
        annotate_links:
          type: boolean
        inline_images:
          type: boolean
        output_tables_as_markdown:
          type: boolean
        merge_continued_tables:
          type: boolean
        compact_markdown_tables:
          type: boolean
        markdown_table_multiline_separator:
          type: boolean
        do_not_unroll_columns:
          type: boolean
        preserve_layout_alignment:
          type: boolean
        preserve_very_small_text:
          type: boolean
        aggressive_table_extraction:
          type: boolean
        extract_layout:
          type: boolean
        high_res_ocr:
          type: boolean
        outlined_table_extraction:
          type: boolean
        specialized_chart_parsing:
          type: boolean

    ProviderInfo:
      type: object
      properties:
        name:
          type: string
          enum:
            - llama-cloud
      required:
        - name

    SyncParseResponse:
      type: object
      properties:
        ok:
          type: boolean
        mode:
          type: string
          enum:
            - sync
        fileName:
          type: string
        mimeType:
          type: string
        outputFormat:
          type: string
          enum:
            - text
            - markdown
            - both
        text:
          type: string
        markdown:
          type: string
        parseConfig:
          $ref: '#/components/schemas/ParseConfig'
        provider:
          $ref: '#/components/schemas/ProviderInfo'
      required:
        - ok
        - mode
        - fileName
        - mimeType
        - outputFormat
        - provider

    AsyncJobResponse:
      type: object
      properties:
        ok:
          type: boolean
        mode:
          type: string
          enum:
            - async
        jobId:
          type: string
          format: uuid
        status:
          type: string
          enum:
            - queued
        statusUrl:
          type: string
          format: uri
      required:
        - ok
        - mode
        - jobId
        - status
        - statusUrl

    JobStatusResponse:
      type: object
      properties:
        ok:
          type: boolean
        jobId:
          type: string
          format: uuid
        status:
          type: string
          enum:
            - queued
            - processing
            - completed
            - failed
        fileName:
          type: string
        mimeType:
          type: string
        outputFormat:
          type: string
          enum:
            - text
            - markdown
            - both
        text:
          type: string
        markdown:
          type: string
        parseConfig:
          $ref: '#/components/schemas/ParseConfig'
        provider:
          $ref: '#/components/schemas/ProviderInfo'
        error:
          type: string
        errorCode:
          type: string
          enum:
            - PARSE_TIMEOUT
            - PARSE_FAILED
        attempts:
          type: integer
        createdAt:
          type: string
          format: date-time
        updatedAt:
          type: string
          format: date-time
      required:
        - ok
        - jobId
        - status

    ConfigResponse:
      type: object
      properties:
        ok:
          type: boolean
        config:
          $ref: '#/components/schemas/ParseConfig'
        secrets:
          type: object
          properties:
            LLAMA_CLOUD_API_KEY:
              type: boolean
            UNIVERSAL_API_KEY:
              type: boolean
            WORKFLOW_INTERNAL_API_KEY:
              type: boolean
            ORCHESTRATOR_API_KEY:
              type: boolean
        env:
          type: object
          properties:
            LLAMA_CLOUD_BASE_URL:
              type: string
            LLAMA_CLOUD_ORGANIZATION_ID:
              type: string
            LLAMA_CLOUD_PROJECT_ID:
              type: string
            LLAMA_PARSE_CLIENT_NAME:
              type: string
            LLAMA_PARSE_TIER:
              type: string
            LLAMA_PARSE_VERSION:
              type: string
            LLAMA_PARSE_POLL_SECONDS:
              type: string
            LLAMA_PARSE_SYNC_MAX_POLLS:
              type: string
            LLAMA_PARSE_QUEUE_POLL_BATCH:
              type: string
            LLAMA_PARSE_MAX_QUEUE_ATTEMPTS:
              type: string
      required:
        - ok
        - config
        - secrets

    SyncTimeoutResponse:
      type: object
      properties:
        ok:
          type: boolean
          enum:
            - false
        mode:
          type: string
          enum:
            - sync
        error:
          type: string
        pollsAttempted:
          type: integer
        maxPolls:
          type: integer
      required:
        - ok
        - mode
        - error
        - pollsAttempted
        - maxPolls

    ErrorResponse:
      type: object
      properties:
        ok:
          type: boolean
          enum:
            - false
        error:
          type: string
      required:
        - ok
        - error

paths:
  /health:
    get:
      operationId: healthCheck
      summary: Health check
      security: []
      responses:
        '200':
          description: Service is healthy
          content:
            application/json:
              schema:
                type: object
                properties:
                  ok:
                    type: boolean
                    enum:
                      - true
                  service:
                    type: string
                    enum:
                      - llama-parse
                  version:
                    type: string
                    enum:
                      - v1
                required:
                  - ok
                  - service
                  - version

  /v1/parse:
    post:
      operationId: syncParse
      summary: Synchronously parse a document
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                output_format:
                  type: string
                  enum:
                    - text
                    - markdown
                    - both
                  default: both
                parse_config:
                  type: string
                  description: JSON string of parse config overrides
              required:
                - file
      responses:
        '200':
          description: Document parsed successfully
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SyncParseResponse'
        '504':
          description: Sync parse timed out
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/SyncTimeoutResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/parse/jobs:
    post:
      operationId: createAsyncJob
      summary: Create an async parse job
      requestBody:
        required: true
        content:
          multipart/form-data:
            schema:
              type: object
              properties:
                file:
                  type: string
                  format: binary
                output_format:
                  type: string
                  enum:
                    - text
                    - markdown
                    - both
                  default: both
                parse_config:
                  type: string
                  description: JSON string of parse config overrides
              required:
                - file
      responses:
        '202':
          description: Async job created
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AsyncJobResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/parse/jobs/{jobId}:
    get:
      operationId: getJobStatus
      summary: Get async job status
      parameters:
        - name: jobId
          in: path
          required: true
          schema:
            type: string
            format: uuid
      responses:
        '200':
          description: Job status
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/JobStatusResponse'
        '404':
          description: Job not found
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/config:
    get:
      operationId: getConfig
      summary: Get effective config and secret status
      responses:
        '200':
          description: Current configuration
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ConfigResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

    put:
      operationId: updateConfig
      summary: Update stored config (deep merge)
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/ParseConfig'
      responses:
        '200':
          description: Config updated
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ConfigResponse'
        '400':
          description: Bad request
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
        '401':
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'
