My App
DocExtract API

Jobs

Advanced job management and querying

Jobs

Jobs represent document processing tasks in the DocExtract API. This guide covers advanced job management, querying, filtering, and monitoring capabilities beyond the basic document submission workflow.

Job Lifecycle

stateDiagram-v2
    [*] --> queued: Document submitted
    queued --> processing: Worker picks up job
    processing --> completed: Extraction successful
    processing --> failed: Extraction failed
    completed --> [*]
    failed --> [*]

Status Transitions

FromToTrigger
queuedprocessingWorker starts processing
processingcompletedExtraction successful
processingfailedError occurred

Jobs cannot be cancelled once submitted. They will either complete or fail.

Advanced Querying

Filter by Multiple Criteria

interface JobQuery {
  extractorId?: string;
  status?: 'queued' | 'processing' | 'completed' | 'failed';
  createdAfter?: string;
  createdBefore?: string;
  minConfidence?: number;
  limit?: number;
  offset?: number;
}

async function queryJobs(query: JobQuery) {
  const params = new URLSearchParams();

  if (query.extractorId) params.append('extractor_id', query.extractorId);
  if (query.status) params.append('status', query.status);
  if (query.createdAfter) params.append('created_after', query.createdAfter);
  if (query.createdBefore) params.append('created_before', query.createdBefore);
  if (query.minConfidence !== undefined) {
    params.append('min_confidence', query.minConfidence.toString());
  }
  if (query.limit) params.append('limit', query.limit.toString());
  if (query.offset) params.append('offset', query.offset.toString());

  const response = await fetch(
    `https://api.adteco.com/v1/documents?${params}`,
    {
      headers: {
        'Authorization': 'Bearer sk_live_your_api_key',
      },
    }
  );

  return response.json();
}

// Example: Get completed jobs from last 24 hours with high confidence
const yesterday = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
const results = await queryJobs({
  status: 'completed',
  createdAfter: yesterday,
  minConfidence: 0.90,
  limit: 100,
});
from datetime import datetime, timedelta
from typing import Optional

def query_jobs(
    extractor_id: Optional[str] = None,
    status: Optional[str] = None,
    created_after: Optional[str] = None,
    created_before: Optional[str] = None,
    min_confidence: Optional[float] = None,
    limit: int = 50,
    offset: int = 0,
):
    params = {
        'limit': limit,
        'offset': offset,
    }

    if extractor_id:
        params['extractor_id'] = extractor_id
    if status:
        params['status'] = status
    if created_after:
        params['created_after'] = created_after
    if created_before:
        params['created_before'] = created_before
    if min_confidence is not None:
        params['min_confidence'] = min_confidence

    response = requests.get(
        'https://api.adteco.com/v1/documents',
        headers={'Authorization': 'Bearer sk_live_your_api_key'},
        params=params,
    )

    return response.json()

# Example: Get completed jobs from last 24 hours with high confidence
yesterday = (datetime.now() - timedelta(days=1)).isoformat()
results = query_jobs(
    status='completed',
    created_after=yesterday,
    min_confidence=0.90,
    limit=100,
)

Pagination

Handle large result sets with pagination:

async function getAllJobs(extractorId: string) {
  const allJobs = [];
  let offset = 0;
  const limit = 100;

  while (true) {
    const response = await queryJobs({
      extractorId,
      limit,
      offset,
    });

    allJobs.push(...response.jobs);

    // Check if there are more results
    if (response.jobs.length < limit || offset + limit >= response.total) {
      break;
    }

    offset += limit;
  }

  return allJobs;
}

Job Statistics

Get aggregated statistics about your document processing jobs.

curl -X GET "https://api.adteco.com/v1/documents/stats?period=30d" \
  -H "Authorization: Bearer sk_live_your_api_key"
const response = await fetch(
  'https://api.adteco.com/v1/documents/stats?period=30d',
  {
    headers: {
      'Authorization': 'Bearer sk_live_your_api_key',
    },
  }
);

const stats = await response.json();
console.log('Total jobs:', stats.total_jobs);
console.log('Success rate:', stats.success_rate);
console.log('Avg processing time:', stats.avg_processing_time_ms);
response = requests.get(
    'https://api.adteco.com/v1/documents/stats',
    headers={'Authorization': 'Bearer sk_live_your_api_key'},
    params={'period': '30d'},
)

stats = response.json()
print(f"Total jobs: {stats['total_jobs']}")
print(f"Success rate: {stats['success_rate']}")
print(f"Avg processing time: {stats['avg_processing_time_ms']}ms")

Response

{
  "period": "30d",
  "total_jobs": 1543,
  "completed_jobs": 1489,
  "failed_jobs": 54,
  "success_rate": 0.965,
  "avg_processing_time_ms": 4231,
  "avg_confidence": 0.91,
  "total_credits_used": 3086,
  "by_extractor": [
    {
      "extractor_id": "ext_abc123...",
      "extractor_name": "Invoice Extractor",
      "job_count": 892,
      "success_rate": 0.98
    }
  ],
  "by_status": {
    "completed": 1489,
    "failed": 54,
    "queued": 0,
    "processing": 0
  }
}

Monitoring and Alerts

Track Processing Time

Monitor processing times to detect performance issues:

async function monitorProcessingTimes() {
  const recentJobs = await queryJobs({
    status: 'completed',
    createdAfter: new Date(Date.now() - 60 * 60 * 1000).toISOString(), // Last hour
    limit: 100,
  });

  const processingTimes = recentJobs.jobs.map(j => j.processing_time_ms);
  const avgTime = processingTimes.reduce((a, b) => a + b, 0) / processingTimes.length;
  const maxTime = Math.max(...processingTimes);

  // Alert if processing is slow
  if (avgTime > 10000 || maxTime > 30000) {
    console.warn('Processing times are higher than normal');
    // Send alert to monitoring service
  }

  return { avgTime, maxTime };
}

Track Failure Rate

Monitor job failures to detect quality issues:

async function monitorFailureRate() {
  const stats = await getJobStats('24h');

  const failureRate = stats.failed_jobs / stats.total_jobs;

  // Alert if failure rate exceeds threshold
  if (failureRate > 0.05) { // 5% threshold
    console.error(`High failure rate detected: ${(failureRate * 100).toFixed(1)}%`);

    // Get recent failures to analyze
    const failures = await queryJobs({
      status: 'failed',
      createdAfter: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
      limit: 50,
    });

    // Group by error code
    const errorCounts = failures.jobs.reduce((acc, job) => {
      const code = job.error_details?.code || 'unknown';
      acc[code] = (acc[code] || 0) + 1;
      return acc;
    }, {});

    console.log('Error breakdown:', errorCounts);
    // Send alert with error analysis
  }

  return { failureRate, totalJobs: stats.total_jobs };
}

Track Confidence Scores

Monitor extraction confidence to ensure quality:

async function monitorConfidence() {
  const recentJobs = await queryJobs({
    status: 'completed',
    createdAfter: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
    limit: 200,
  });

  // Calculate average confidence per field
  const fieldConfidences: Record<string, number[]> = {};

  recentJobs.jobs.forEach(job => {
    Object.entries(job.confidence).forEach(([field, score]) => {
      if (!fieldConfidences[field]) {
        fieldConfidences[field] = [];
      }
      fieldConfidences[field].push(score as number);
    });
  });

  // Alert on low confidence fields
  Object.entries(fieldConfidences).forEach(([field, scores]) => {
    const avgConfidence = scores.reduce((a, b) => a + b, 0) / scores.length;

    if (avgConfidence < 0.80) {
      console.warn(
        `Low confidence for field "${field}": ${(avgConfidence * 100).toFixed(1)}%`
      );
      // Consider updating field description in extractor
    }
  });

  return fieldConfidences;
}

Bulk Operations

Reprocess Failed Jobs

Retry failed jobs with an updated extractor:

async function reprocessFailedJobs(extractorId: string) {
  // Get all failed jobs for this extractor
  const failedJobs = await queryJobs({
    extractorId,
    status: 'failed',
    limit: 100,
  });

  console.log(`Found ${failedJobs.jobs.length} failed jobs to reprocess`);

  // Reprocess each job
  const reprocessResults = [];

  for (const job of failedJobs.jobs) {
    try {
      // Download original document
      const docResponse = await fetch(job.document_url);
      const docBlob = await docResponse.blob();
      const docBuffer = Buffer.from(await docBlob.arrayBuffer());
      const base64Doc = docBuffer.toString('base64');

      // Submit for reprocessing
      const newJob = await fetch('https://api.adteco.com/v1/documents', {
        method: 'POST',
        headers: {
          'Authorization': 'Bearer sk_live_your_api_key',
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          extractor_id: extractorId,
          document: base64Doc,
          mime_type: job.mime_type,
          metadata: {
            ...job.metadata,
            reprocessed_from: job.id,
          },
        }),
      });

      reprocessResults.push({
        originalJobId: job.id,
        newJobId: (await newJob.json()).id,
      });
    } catch (error) {
      console.error(`Failed to reprocess job ${job.id}:`, error);
    }
  }

  return reprocessResults;
}

Export Job Results

Export extracted data to CSV or JSON:

import { createObjectCsvWriter } from 'csv-writer';

async function exportJobResults(extractorId: string, format: 'csv' | 'json') {
  const jobs = await getAllJobs(extractorId);

  if (format === 'json') {
    // Export as JSON
    const exportData = jobs.map(job => ({
      job_id: job.id,
      status: job.status,
      created_at: job.created_at,
      completed_at: job.completed_at,
      processing_time_ms: job.processing_time_ms,
      cost_credits: job.cost_credits,
      ...job.extracted_data,
    }));

    fs.writeFileSync(
      'export.json',
      JSON.stringify(exportData, null, 2)
    );
  } else {
    // Export as CSV
    const completedJobs = jobs.filter(j => j.status === 'completed');

    if (completedJobs.length === 0) {
      console.log('No completed jobs to export');
      return;
    }

    // Get all unique field names
    const fieldNames = new Set<string>();
    completedJobs.forEach(job => {
      Object.keys(job.extracted_data).forEach(field => fieldNames.add(field));
    });

    const csvWriter = createObjectCsvWriter({
      path: 'export.csv',
      header: [
        { id: 'job_id', title: 'Job ID' },
        { id: 'created_at', title: 'Created At' },
        { id: 'processing_time_ms', title: 'Processing Time (ms)' },
        ...Array.from(fieldNames).map(field => ({
          id: field,
          title: field,
        })),
      ],
    });

    const records = completedJobs.map(job => ({
      job_id: job.id,
      created_at: job.created_at,
      processing_time_ms: job.processing_time_ms,
      ...job.extracted_data,
    }));

    await csvWriter.writeRecords(records);
    console.log(`Exported ${records.length} jobs to export.csv`);
  }
}

Job Metadata

Leverage metadata for tracking and organization:

// Submit with tracking metadata
const job = await fetch('https://api.adteco.com/v1/documents', {
  method: 'POST',
  headers: {
    'Authorization': 'Bearer sk_live_your_api_key',
    'Content-Type': 'application/json',
  },
  body: JSON.stringify({
    extractor_id: 'ext_abc123...',
    document: base64Document,
    mime_type: 'application/pdf',
    metadata: {
      customer_id: 'cust_123',
      customer_name: 'Acme Corp',
      invoice_type: 'recurring',
      source: 'email',
      source_email: 'invoices@acme.com',
      department: 'accounting',
      priority: 'high',
      processed_by: 'user_456',
      batch_id: 'batch_202411_001',
    },
  }),
});

// Query by metadata (if your org has metadata search enabled)
const results = await queryJobs({
  metadata: {
    customer_id: 'cust_123',
    department: 'accounting',
  },
  limit: 100,
});

Performance Optimization

Parallel Processing

Process multiple documents concurrently:

async function processBatch(
  extractorId: string,
  documents: Array<{ path: string; metadata?: any }>
) {
  // Submit all documents in parallel
  const submissions = documents.map(async ({ path, metadata }) => {
    const buffer = fs.readFileSync(path);
    const base64Doc = buffer.toString('base64');

    return fetch('https://api.adteco.com/v1/documents', {
      method: 'POST',
      headers: {
        'Authorization': 'Bearer sk_live_your_api_key',
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        extractor_id: extractorId,
        document: base64Doc,
        mime_type: 'application/pdf',
        metadata,
      }),
    }).then(r => r.json());
  });

  const jobs = await Promise.all(submissions);
  console.log(`Submitted ${jobs.length} jobs`);

  return jobs;
}

// Process with rate limiting
async function processBatchWithRateLimit(
  extractorId: string,
  documents: string[],
  maxConcurrent: number = 10
) {
  const results = [];

  for (let i = 0; i < documents.length; i += maxConcurrent) {
    const batch = documents.slice(i, i + maxConcurrent);
    const batchResults = await processBatch(
      extractorId,
      batch.map(path => ({ path }))
    );
    results.push(...batchResults);

    // Small delay between batches
    if (i + maxConcurrent < documents.length) {
      await new Promise(resolve => setTimeout(resolve, 1000));
    }
  }

  return results;
}

Caching Results

Cache job results to avoid reprocessing:

import Redis from 'ioredis';

const redis = new Redis();

async function getJobWithCache(jobId: string) {
  // Check cache first
  const cached = await redis.get(`job:${jobId}`);
  if (cached) {
    return JSON.parse(cached);
  }

  // Fetch from API
  const response = await fetch(
    `https://api.adteco.com/v1/documents/${jobId}`,
    {
      headers: {
        'Authorization': 'Bearer sk_live_your_api_key',
      },
    }
  );

  const job = await response.json();

  // Cache completed jobs (expire after 7 days)
  if (job.status === 'completed') {
    await redis.setex(
      `job:${jobId}`,
      7 * 24 * 60 * 60,
      JSON.stringify(job)
    );
  }

  return job;
}

Best Practices

Job Retention

Jobs are retained for 90 days by default. Archive important results:

async function archiveCompletedJobs() {
  const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString();

  const oldJobs = await queryJobs({
    status: 'completed',
    createdBefore: thirtyDaysAgo,
    limit: 100,
  });

  // Store in your database
  for (const job of oldJobs.jobs) {
    await db.jobs.create({
      docextract_job_id: job.id,
      extractor_id: job.extractor_id,
      extracted_data: job.extracted_data,
      confidence: job.confidence,
      processing_time_ms: job.processing_time_ms,
      cost_credits: job.cost_credits,
      created_at: job.created_at,
      completed_at: job.completed_at,
    });
  }

  console.log(`Archived ${oldJobs.jobs.length} jobs`);
}

Error Recovery

Implement robust error recovery:

async function robustProcessDocument(extractorId: string, documentPath: string) {
  const maxRetries = 3;
  let attempt = 0;

  while (attempt < maxRetries) {
    try {
      const buffer = fs.readFileSync(documentPath);
      const base64Doc = buffer.toString('base64');

      const response = await fetch('https://api.adteco.com/v1/documents', {
        method: 'POST',
        headers: {
          'Authorization': 'Bearer sk_live_your_api_key',
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          extractor_id: extractorId,
          document: base64Doc,
          mime_type: 'application/pdf',
        }),
      });

      if (!response.ok) {
        throw new Error(`HTTP ${response.status}: ${await response.text()}`);
      }

      const job = await response.json();
      const results = await waitForResults(job.id);

      return results;
    } catch (error) {
      attempt++;

      if (error.code === 'rate_limit_exceeded') {
        // Exponential backoff for rate limits
        const delay = Math.pow(2, attempt) * 1000;
        console.log(`Rate limited. Retrying in ${delay}ms...`);
        await new Promise(resolve => setTimeout(resolve, delay));
      } else if (attempt >= maxRetries) {
        console.error(`Failed after ${maxRetries} attempts:`, error);
        throw error;
      } else {
        console.log(`Attempt ${attempt} failed, retrying...`);
        await new Promise(resolve => setTimeout(resolve, 1000));
      }
    }
  }
}

Next Steps