Jobs

Jobs represent document processing tasks in the DocExtract API. This guide covers advanced job management, querying, filtering, and monitoring capabilities beyond the basic document submission workflow.

Job Lifecycle

stateDiagram-v2
    [*] --> queued: Document submitted
    queued --> processing: Worker picks up job
    processing --> completed: Extraction successful
    processing --> failed: Extraction failed
    completed --> [*]
    failed --> [*]

Status Transitions

From	To	Trigger
`queued`	`processing`	Worker starts processing
`processing`	`completed`	Extraction successful
`processing`	`failed`	Error occurred

Jobs cannot be cancelled once submitted. They will either complete or fail.

Advanced Querying

Filter by Multiple Criteria

interface JobQuery {
  extractorId?: string;
  status?: 'queued' | 'processing' | 'completed' | 'failed';
  createdAfter?: string;
  createdBefore?: string;
  minConfidence?: number;
  limit?: number;
  offset?: number;
}

async function queryJobs(query: JobQuery) {
  const params = new URLSearchParams();

  if (query.extractorId) params.append('extractor_id', query.extractorId);
  if (query.status) params.append('status', query.status);
  if (query.createdAfter) params.append('created_after', query.createdAfter);
  if (query.createdBefore) params.append('created_before', query.createdBefore);
  if (query.minConfidence !== undefined) {
    params.append('min_confidence', query.minConfidence.toString());
  }
  if (query.limit) params.append('limit', query.limit.toString());
  if (query.offset) params.append('offset', query.offset.toString());

  const response = await fetch(
    `https://api.adteco.com/v1/documents?${params}`,
    {
      headers: {
        'Authorization': 'Bearer sk_live_your_api_key',
      },
    }
  );

  return response.json();
}

// Example: Get completed jobs from last 24 hours with high confidence
const yesterday = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
const results = await queryJobs({
  status: 'completed',
  createdAfter: yesterday,
  minConfidence: 0.90,
  limit: 100,
});

from datetime import datetime, timedelta
from typing import Optional

def query_jobs(
    extractor_id: Optional[str] = None,
    status: Optional[str] = None,
    created_after: Optional[str] = None,
    created_before: Optional[str] = None,
    min_confidence: Optional[float] = None,
    limit: int = 50,
    offset: int = 0,
):
    params = {
        'limit': limit,
        'offset': offset,
    }

    if extractor_id:
        params['extractor_id'] = extractor_id
    if status:
        params['status'] = status
    if created_after:
        params['created_after'] = created_after
    if created_before:
        params['created_before'] = created_before
    if min_confidence is not None:
        params['min_confidence'] = min_confidence

    response = requests.get(
        'https://api.adteco.com/v1/documents',
        headers={'Authorization': 'Bearer sk_live_your_api_key'},
        params=params,
    )

    return response.json()

# Example: Get completed jobs from last 24 hours with high confidence
yesterday = (datetime.now() - timedelta(days=1)).isoformat()
results = query_jobs(
    status='completed',
    created_after=yesterday,
    min_confidence=0.90,
    limit=100,
)

Pagination

Handle large result sets with pagination:

async function getAllJobs(extractorId: string) {
  const allJobs = [];
  let offset = 0;
  const limit = 100;

  while (true) {
    const response = await queryJobs({
      extractorId,
      limit,
      offset,
    });

    allJobs.push(...response.jobs);

    // Check if there are more results
    if (response.jobs.length < limit || offset + limit >= response.total) {
      break;
    }

    offset += limit;
  }

  return allJobs;
}

Job Statistics

Get aggregated statistics about your document processing jobs.

curl -X GET "https://api.adteco.com/v1/documents/stats?period=30d" \
  -H "Authorization: Bearer sk_live_your_api_key"

const response = await fetch(
  'https://api.adteco.com/v1/documents/stats?period=30d',
  {
    headers: {
      'Authorization': 'Bearer sk_live_your_api_key',
    },
  }
);

const stats = await response.json();
console.log('Total jobs:', stats.total_jobs);
console.log('Success rate:', stats.success_rate);
console.log('Avg processing time:', stats.avg_processing_time_ms);

response = requests.get(
    'https://api.adteco.com/v1/documents/stats',
    headers={'Authorization': 'Bearer sk_live_your_api_key'},
    params={'period': '30d'},
)

stats = response.json()
print(f"Total jobs: {stats['total_jobs']}")
print(f"Success rate: {stats['success_rate']}")
print(f"Avg processing time: {stats['avg_processing_time_ms']}ms")

Response

{
  "period": "30d",
  "total_jobs": 1543,
  "completed_jobs": 1489,
  "failed_jobs": 54,
  "success_rate": 0.965,
  "avg_processing_time_ms": 4231,
  "avg_confidence": 0.91,
  "total_credits_used": 3086,
  "by_extractor": [
    {
      "extractor_id": "ext_abc123...",
      "extractor_name": "Invoice Extractor",
      "job_count": 892,
      "success_rate": 0.98
    }
  ],
  "by_status": {
    "completed": 1489,
    "failed": 54,
    "queued": 0,
    "processing": 0
  }
}

Monitoring and Alerts

Track Processing Time

Monitor processing times to detect performance issues:

async function monitorProcessingTimes() {
  const recentJobs = await queryJobs({
    status: 'completed',
    createdAfter: new Date(Date.now() - 60 * 60 * 1000).toISOString(), // Last hour
    limit: 100,
  });

  const processingTimes = recentJobs.jobs.map(j => j.processing_time_ms);
  const avgTime = processingTimes.reduce((a, b) => a + b, 0) / processingTimes.length;
  const maxTime = Math.max(...processingTimes);

  // Alert if processing is slow
  if (avgTime > 10000 || maxTime > 30000) {
    console.warn('Processing times are higher than normal');
    // Send alert to monitoring service
  }

  return { avgTime, maxTime };
}

Track Failure Rate

Monitor job failures to detect quality issues:

async function monitorFailureRate() {
  const stats = await getJobStats('24h');

  const failureRate = stats.failed_jobs / stats.total_jobs;

  // Alert if failure rate exceeds threshold
  if (failureRate > 0.05) { // 5% threshold
    console.error(`High failure rate detected: ${(failureRate * 100).toFixed(1)}%`);

    // Get recent failures to analyze
    const failures = await queryJobs({
      status: 'failed',
      createdAfter: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
      limit: 50,
    });

    // Group by error code
    const errorCounts = failures.jobs.reduce((acc, job) => {
      const code = job.error_details?.code || 'unknown';
      acc[code] = (acc[code] || 0) + 1;
      return acc;
    }, {});

    console.log('Error breakdown:', errorCounts);
    // Send alert with error analysis
  }

  return { failureRate, totalJobs: stats.total_jobs };
}

Track Confidence Scores

Monitor extraction confidence to ensure quality:

async function monitorConfidence() {
  const recentJobs = await queryJobs({
    status: 'completed',
    createdAfter: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
    limit: 200,
  });

  // Calculate average confidence per field
  const fieldConfidences: Record<string, number[]> = {};

  recentJobs.jobs.forEach(job => {
    Object.entries(job.confidence).forEach(([field, score]) => {
      if (!fieldConfidences[field]) {
        fieldConfidences[field] = [];
      }
      fieldConfidences[field].push(score as number);
    });
  });

  // Alert on low confidence fields
  Object.entries(fieldConfidences).forEach(([field, scores]) => {
    const avgConfidence = scores.reduce((a, b) => a + b, 0) / scores.length;

    if (avgConfidence < 0.80) {
      console.warn(
        `Low confidence for field "${field}": ${(avgConfidence * 100).toFixed(1)}%`
      );
      // Consider updating field description in extractor
    }
  });

  return fieldConfidences;
}

Bulk Operations

Reprocess Failed Jobs

Retry failed jobs with an updated extractor:

async function reprocessFailedJobs(extractorId: string) {
  // Get all failed jobs for this extractor
  const failedJobs = await queryJobs({
    extractorId,
    status: 'failed',
    limit: 100,
  });

  console.log(`Found ${failedJobs.jobs.length} failed jobs to reprocess`);

  // Reprocess each job
  const reprocessResults = [];

  for (const job of failedJobs.jobs) {
    try {
      // Download original document
      const docResponse = await fetch(job.document_url);
      const docBlob = await docResponse.blob();
      const docBuffer = Buffer.from(await docBlob.arrayBuffer());
      const base64Doc = docBuffer.toString('base64');

      // Submit for reprocessing
      const newJob = await fetch('https://api.adteco.com/v1/documents', {
        method: 'POST',
        headers: {
          'Authorization': 'Bearer sk_live_your_api_key',
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          extractor_id: extractorId,
          document: base64Doc,
          mime_type: job.mime_type,
          metadata: {
            ...job.metadata,
            reprocessed_from: job.id,
          },
        }),
      });

      reprocessResults.push({
        originalJobId: job.id,
        newJobId: (await newJob.json()).id,
      });
    } catch (error) {
      console.error(`Failed to reprocess job ${job.id}:`, error);
    }
  }

  return reprocessResults;
}

Export Job Results

Export extracted data to CSV or JSON:

import { createObjectCsvWriter } from 'csv-writer';

async function exportJobResults(extractorId: string, format: 'csv' | 'json') {
  const jobs = await getAllJobs(extractorId);

  if (format === 'json') {
    // Export as JSON
    const exportData = jobs.map(job => ({
      job_id: job.id,
      status: job.status,
      created_at: job.created_at,
      completed_at: job.completed_at,
      processing_time_ms: job.processing_time_ms,
      cost_credits: job.cost_credits,
      ...job.extracted_data,
    }));

    fs.writeFileSync(
      'export.json',
      JSON.stringify(exportData, null, 2)
    );
  } else {
    // Export as CSV
    const completedJobs = jobs.filter(j => j.status === 'completed');

    if (completedJobs.length === 0) {
      console.log('No completed jobs to export');
      return;
    }

    // Get all unique field names
    const fieldNames = new Set<string>();
    completedJobs.forEach(job => {
      Object.keys(job.extracted_data).forEach(field => fieldNames.add(field));
    });

    const csvWriter = createObjectCsvWriter({
      path: 'export.csv',
      header: [
        { id: 'job_id', title: 'Job ID' },
        { id: 'created_at', title: 'Created At' },
        { id: 'processing_time_ms', title: 'Processing Time (ms)' },
        ...Array.from(fieldNames).map(field => ({
          id: field,
          title: field,
        })),
      ],
    });

    const records = completedJobs.map(job => ({
      job_id: job.id,
      created_at: job.created_at,
      processing_time_ms: job.processing_time_ms,
      ...job.extracted_data,
    }));

    await csvWriter.writeRecords(records);
    console.log(`Exported ${records.length} jobs to export.csv`);
  }
}

Job Metadata

Leverage metadata for tracking and organization:

// Submit with tracking metadata
const job = await fetch('https://api.adteco.com/v1/documents', {
  method: 'POST',
  headers: {
    'Authorization': 'Bearer sk_live_your_api_key',
    'Content-Type': 'application/json',
  },
  body: JSON.stringify({
    extractor_id: 'ext_abc123...',
    document: base64Document,
    mime_type: 'application/pdf',
    metadata: {
      customer_id: 'cust_123',
      customer_name: 'Acme Corp',
      invoice_type: 'recurring',
      source: 'email',
      source_email: 'invoices@acme.com',
      department: 'accounting',
      priority: 'high',
      processed_by: 'user_456',
      batch_id: 'batch_202411_001',
    },
  }),
});

// Query by metadata (if your org has metadata search enabled)
const results = await queryJobs({
  metadata: {
    customer_id: 'cust_123',
    department: 'accounting',
  },
  limit: 100,
});

Performance Optimization

Parallel Processing

Process multiple documents concurrently:

async function processBatch(
  extractorId: string,
  documents: Array<{ path: string; metadata?: any }>
) {
  // Submit all documents in parallel
  const submissions = documents.map(async ({ path, metadata }) => {
    const buffer = fs.readFileSync(path);
    const base64Doc = buffer.toString('base64');

    return fetch('https://api.adteco.com/v1/documents', {
      method: 'POST',
      headers: {
        'Authorization': 'Bearer sk_live_your_api_key',
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        extractor_id: extractorId,
        document: base64Doc,
        mime_type: 'application/pdf',
        metadata,
      }),
    }).then(r => r.json());
  });

  const jobs = await Promise.all(submissions);
  console.log(`Submitted ${jobs.length} jobs`);

  return jobs;
}

// Process with rate limiting
async function processBatchWithRateLimit(
  extractorId: string,
  documents: string[],
  maxConcurrent: number = 10
) {
  const results = [];

  for (let i = 0; i < documents.length; i += maxConcurrent) {
    const batch = documents.slice(i, i + maxConcurrent);
    const batchResults = await processBatch(
      extractorId,
      batch.map(path => ({ path }))
    );
    results.push(...batchResults);

    // Small delay between batches
    if (i + maxConcurrent < documents.length) {
      await new Promise(resolve => setTimeout(resolve, 1000));
    }
  }

  return results;
}

Caching Results

Cache job results to avoid reprocessing:

import Redis from 'ioredis';

const redis = new Redis();

async function getJobWithCache(jobId: string) {
  // Check cache first
  const cached = await redis.get(`job:${jobId}`);
  if (cached) {
    return JSON.parse(cached);
  }

  // Fetch from API
  const response = await fetch(
    `https://api.adteco.com/v1/documents/${jobId}`,
    {
      headers: {
        'Authorization': 'Bearer sk_live_your_api_key',
      },
    }
  );

  const job = await response.json();

  // Cache completed jobs (expire after 7 days)
  if (job.status === 'completed') {
    await redis.setex(
      `job:${jobId}`,
      7 * 24 * 60 * 60,
      JSON.stringify(job)
    );
  }

  return job;
}

Best Practices

Job Retention

Jobs are retained for 90 days by default. Archive important results:

async function archiveCompletedJobs() {
  const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString();

  const oldJobs = await queryJobs({
    status: 'completed',
    createdBefore: thirtyDaysAgo,
    limit: 100,
  });

  // Store in your database
  for (const job of oldJobs.jobs) {
    await db.jobs.create({
      docextract_job_id: job.id,
      extractor_id: job.extractor_id,
      extracted_data: job.extracted_data,
      confidence: job.confidence,
      processing_time_ms: job.processing_time_ms,
      cost_credits: job.cost_credits,
      created_at: job.created_at,
      completed_at: job.completed_at,
    });
  }

  console.log(`Archived ${oldJobs.jobs.length} jobs`);
}

Error Recovery

Implement robust error recovery:

async function robustProcessDocument(extractorId: string, documentPath: string) {
  const maxRetries = 3;
  let attempt = 0;

  while (attempt < maxRetries) {
    try {
      const buffer = fs.readFileSync(documentPath);
      const base64Doc = buffer.toString('base64');

      const response = await fetch('https://api.adteco.com/v1/documents', {
        method: 'POST',
        headers: {
          'Authorization': 'Bearer sk_live_your_api_key',
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          extractor_id: extractorId,
          document: base64Doc,
          mime_type: 'application/pdf',
        }),
      });

      if (!response.ok) {
        throw new Error(`HTTP ${response.status}: ${await response.text()}`);
      }

      const job = await response.json();
      const results = await waitForResults(job.id);

      return results;
    } catch (error) {
      attempt++;

      if (error.code === 'rate_limit_exceeded') {
        // Exponential backoff for rate limits
        const delay = Math.pow(2, attempt) * 1000;
        console.log(`Rate limited. Retrying in ${delay}ms...`);
        await new Promise(resolve => setTimeout(resolve, delay));
      } else if (attempt >= maxRetries) {
        console.error(`Failed after ${maxRetries} attempts:`, error);
        throw error;
      } else {
        console.log(`Attempt ${attempt} failed, retrying...`);
        await new Promise(resolve => setTimeout(resolve, 1000));
      }
    }
  }
}

Next Steps

Credits

Manage credits and billing

Webhooks

Real-time job notifications

Error Handling

Complete error reference

Jobs

Credits

Webhooks

Error Handling

On this page