openapi: 3.1.0
info:
  title: document-classification
  description: Classifies documents against a Fibery artifact catalog using hybrid lexical+semantic search, reranking, and LLM-based agent review.
  version: v1
  contact:
    name: AdviceOS

servers:
  - url: https://document-classification.your-subdomain.workers.dev
    description: Production

security: []

tags:
  - name: Health
  - name: Classification
  - name: Configuration

paths:
  /health:
    get:
      operationId: getHealth
      summary: Health check
      tags: [Health]
      responses:
        "200":
          description: Service is healthy
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/HealthResponse"

  /v1/classify:
    post:
      operationId: classifyDocument
      summary: Classify a document synchronously
      tags: [Classification]
      security:
        - ApiKeyAuth: []
        - WorkflowAuth: []
        - OrchestratorAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ClassifyRequest"
      responses:
        "200":
          description: Classification result
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ClassificationResult"
        "400":
          description: Invalid request body
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
        "401":
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
        "500":
          description: Internal server error
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"

  /v1/classify/jobs:
    post:
      operationId: createClassifyJob
      summary: Create an asynchronous classification job
      tags: [Classification]
      security:
        - ApiKeyAuth: []
        - WorkflowAuth: []
        - OrchestratorAuth: []
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: "#/components/schemas/ClassifyRequest"
      responses:
        "202":
          description: Job accepted and queued
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/AsyncJobResponse"
        "400":
          description: Invalid request body
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
        "401":
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"

  /v1/classify/jobs/{classificationId}:
    get:
      operationId: getClassifyJobStatus
      summary: Get the status of an async classification job
      tags: [Classification]
      security:
        - ApiKeyAuth: []
        - WorkflowAuth: []
        - OrchestratorAuth: []
      parameters:
        - name: classificationId
          in: path
          required: true
          schema:
            type: string
      responses:
        "200":
          description: Job status
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/JobStatusResponse"
        "401":
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
        "404":
          description: Job not found
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"

  /v1/config:
    get:
      operationId: getConfig
      summary: Get current models and thresholds configuration
      tags: [Configuration]
      security:
        - ApiKeyAuth: []
        - WorkflowAuth: []
        - OrchestratorAuth: []
      responses:
        "200":
          description: Current configuration
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ConfigResponse"
        "401":
          description: Unauthorized
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"
    put:
      operationId: updateConfig
      summary: Update configuration (not yet implemented)
      tags: [Configuration]
      security:
        - ApiKeyAuth: []
        - WorkflowAuth: []
        - OrchestratorAuth: []
      responses:
        "501":
          description: Not implemented
          content:
            application/json:
              schema:
                $ref: "#/components/schemas/ErrorResponse"

components:
  securitySchemes:
    ApiKeyAuth:
      type: apiKey
      in: header
      name: X-Universal-Api-Key
    WorkflowAuth:
      type: apiKey
      in: header
      name: X-Workflow-Internal-Api-Key
    OrchestratorAuth:
      type: apiKey
      in: header
      name: X-Orchestrator-Api-Key

  schemas:
    HealthResponse:
      type: object
      required: [ok, service, version]
      properties:
        ok:
          type: boolean
        service:
          type: string
        version:
          type: string
      example:
        ok: true
        service: document-classification
        version: v1

    ClassifyRequest:
      type: object
      required: [documentText]
      properties:
        documentText:
          type: string
          description: Full text of the document to classify
        filename:
          type: string
          description: Original filename for additional context
      example:
        documentText: "Quarterly business review for Q3 2025..."
        filename: "Q3-2025-review.pdf"

    ClassificationResult:
      type: object
      required: [source, decision, confidence, topCandidates, models]
      properties:
        source:
          type: string
          enum: [sync, async]
          description: Whether the result came from a synchronous or asynchronous call
        decision:
          type: string
          enum: [match, needs_user_input, no_match]
          description: Classification decision
        artifactId:
          type: string
          nullable: true
          description: ID of the matched Fibery artifact (null if no match)
        artifactName:
          type: string
          nullable: true
          description: Name of the matched Fibery artifact (null if no match)
        confidence:
          type: number
          format: double
          minimum: 0
          maximum: 1
          description: Confidence score of the classification
        rationale:
          type: string
          nullable: true
          description: Explanation of the decision
        alternatives:
          type: array
          items:
            $ref: "#/components/schemas/Candidate"
          description: Alternative artifact candidates that were considered
        diagnostics:
          type: object
          nullable: true
          additionalProperties: true
          description: Pipeline diagnostic information
        analysis:
          $ref: "#/components/schemas/DocumentAnalysis"
        models:
          $ref: "#/components/schemas/ModelsUsed"
        topCandidates:
          type: array
          items:
            $ref: "#/components/schemas/Candidate"
          description: Ranked list of top candidate artifacts

    Candidate:
      type: object
      required: [artifactId, artifactName, score]
      properties:
        artifactId:
          type: string
        artifactName:
          type: string
        score:
          type: number
          format: double
          minimum: 0
          maximum: 1
        rerankScore:
          type: number
          format: double
          nullable: true

    DocumentAnalysis:
      type: object
      nullable: true
      properties:
        summary:
          type: string
        title:
          type: string
        owner:
          type: string
          nullable: true
        keywords:
          type: array
          items:
            type: string

    ModelsUsed:
      type: object
      properties:
        analysis:
          type: string
          description: Model used for document analysis
        embedding:
          type: string
          description: Model used for semantic embeddings
        rerank:
          type: string
          description: Model used for candidate reranking
        decision:
          type: string
          nullable: true
          description: Model used for agent review decision

    AsyncJobResponse:
      type: object
      required: [ok, jobId, classificationId, status, statusUrl]
      properties:
        ok:
          type: boolean
        jobId:
          type: string
        classificationId:
          type: string
        status:
          type: string
          enum: [queued]
        statusUrl:
          type: string
          description: Relative URL to poll for job status
      example:
        ok: true
        jobId: "job_abc123"
        classificationId: "clf_def456"
        status: queued
        statusUrl: "/v1/classify/jobs/clf_def456"

    JobStatusResponse:
      type: object
      required: [classificationId, status]
      properties:
        classificationId:
          type: string
        status:
          type: string
          enum: [queued, processing, completed, failed]
        result:
          $ref: "#/components/schemas/ClassificationResult"
          nullable: true
          description: Populated when status is completed
        error:
          type: string
          nullable: true
          description: Error message when status is failed
        createdAt:
          type: string
          format: date-time
        updatedAt:
          type: string
          format: date-time

    ConfigResponse:
      type: object
      required: [models, thresholds]
      properties:
        models:
          type: object
          properties:
            analysis:
              type: string
            embedding:
              type: string
            rerank:
              type: string
            decision:
              type: string
              nullable: true
        thresholds:
          type: object
          properties:
            strongMatch:
              type: number
              format: double
            highConfAutoAccept:
              type: number
              format: double
            closeMatchMargin:
              type: number
              format: double
      example:
        models:
          analysis: "openai/gpt-4o-mini"
          embedding: "openai/text-embedding-3-small"
          rerank: "cohere/rerank-english-v3.0"
          decision: null
        thresholds:
          strongMatch: 0.76
          highConfAutoAccept: 0.9
          closeMatchMargin: 0.08

    ErrorResponse:
      type: object
      required: [ok, error]
      properties:
        ok:
          type: boolean
          example: false
        error:
          type: string
