Jobs
Advanced job management and querying
Jobs
Jobs represent document processing tasks in the DocExtract API. This guide covers advanced job management, querying, filtering, and monitoring capabilities beyond the basic document submission workflow.
Job Lifecycle
stateDiagram-v2
[*] --> queued: Document submitted
queued --> processing: Worker picks up job
processing --> completed: Extraction successful
processing --> failed: Extraction failed
completed --> [*]
failed --> [*]Status Transitions
| From | To | Trigger |
|---|---|---|
queued | processing | Worker starts processing |
processing | completed | Extraction successful |
processing | failed | Error occurred |
Jobs cannot be cancelled once submitted. They will either complete or fail.
Advanced Querying
Filter by Multiple Criteria
interface JobQuery {
extractorId?: string;
status?: 'queued' | 'processing' | 'completed' | 'failed';
createdAfter?: string;
createdBefore?: string;
minConfidence?: number;
limit?: number;
offset?: number;
}
async function queryJobs(query: JobQuery) {
const params = new URLSearchParams();
if (query.extractorId) params.append('extractor_id', query.extractorId);
if (query.status) params.append('status', query.status);
if (query.createdAfter) params.append('created_after', query.createdAfter);
if (query.createdBefore) params.append('created_before', query.createdBefore);
if (query.minConfidence !== undefined) {
params.append('min_confidence', query.minConfidence.toString());
}
if (query.limit) params.append('limit', query.limit.toString());
if (query.offset) params.append('offset', query.offset.toString());
const response = await fetch(
`https://api.adteco.com/v1/documents?${params}`,
{
headers: {
'Authorization': 'Bearer sk_live_your_api_key',
},
}
);
return response.json();
}
// Example: Get completed jobs from last 24 hours with high confidence
const yesterday = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString();
const results = await queryJobs({
status: 'completed',
createdAfter: yesterday,
minConfidence: 0.90,
limit: 100,
});from datetime import datetime, timedelta
from typing import Optional
def query_jobs(
extractor_id: Optional[str] = None,
status: Optional[str] = None,
created_after: Optional[str] = None,
created_before: Optional[str] = None,
min_confidence: Optional[float] = None,
limit: int = 50,
offset: int = 0,
):
params = {
'limit': limit,
'offset': offset,
}
if extractor_id:
params['extractor_id'] = extractor_id
if status:
params['status'] = status
if created_after:
params['created_after'] = created_after
if created_before:
params['created_before'] = created_before
if min_confidence is not None:
params['min_confidence'] = min_confidence
response = requests.get(
'https://api.adteco.com/v1/documents',
headers={'Authorization': 'Bearer sk_live_your_api_key'},
params=params,
)
return response.json()
# Example: Get completed jobs from last 24 hours with high confidence
yesterday = (datetime.now() - timedelta(days=1)).isoformat()
results = query_jobs(
status='completed',
created_after=yesterday,
min_confidence=0.90,
limit=100,
)Pagination
Handle large result sets with pagination:
async function getAllJobs(extractorId: string) {
const allJobs = [];
let offset = 0;
const limit = 100;
while (true) {
const response = await queryJobs({
extractorId,
limit,
offset,
});
allJobs.push(...response.jobs);
// Check if there are more results
if (response.jobs.length < limit || offset + limit >= response.total) {
break;
}
offset += limit;
}
return allJobs;
}Job Statistics
Get aggregated statistics about your document processing jobs.
curl -X GET "https://api.adteco.com/v1/documents/stats?period=30d" \
-H "Authorization: Bearer sk_live_your_api_key"const response = await fetch(
'https://api.adteco.com/v1/documents/stats?period=30d',
{
headers: {
'Authorization': 'Bearer sk_live_your_api_key',
},
}
);
const stats = await response.json();
console.log('Total jobs:', stats.total_jobs);
console.log('Success rate:', stats.success_rate);
console.log('Avg processing time:', stats.avg_processing_time_ms);response = requests.get(
'https://api.adteco.com/v1/documents/stats',
headers={'Authorization': 'Bearer sk_live_your_api_key'},
params={'period': '30d'},
)
stats = response.json()
print(f"Total jobs: {stats['total_jobs']}")
print(f"Success rate: {stats['success_rate']}")
print(f"Avg processing time: {stats['avg_processing_time_ms']}ms")Response
{
"period": "30d",
"total_jobs": 1543,
"completed_jobs": 1489,
"failed_jobs": 54,
"success_rate": 0.965,
"avg_processing_time_ms": 4231,
"avg_confidence": 0.91,
"total_credits_used": 3086,
"by_extractor": [
{
"extractor_id": "ext_abc123...",
"extractor_name": "Invoice Extractor",
"job_count": 892,
"success_rate": 0.98
}
],
"by_status": {
"completed": 1489,
"failed": 54,
"queued": 0,
"processing": 0
}
}Monitoring and Alerts
Track Processing Time
Monitor processing times to detect performance issues:
async function monitorProcessingTimes() {
const recentJobs = await queryJobs({
status: 'completed',
createdAfter: new Date(Date.now() - 60 * 60 * 1000).toISOString(), // Last hour
limit: 100,
});
const processingTimes = recentJobs.jobs.map(j => j.processing_time_ms);
const avgTime = processingTimes.reduce((a, b) => a + b, 0) / processingTimes.length;
const maxTime = Math.max(...processingTimes);
// Alert if processing is slow
if (avgTime > 10000 || maxTime > 30000) {
console.warn('Processing times are higher than normal');
// Send alert to monitoring service
}
return { avgTime, maxTime };
}Track Failure Rate
Monitor job failures to detect quality issues:
async function monitorFailureRate() {
const stats = await getJobStats('24h');
const failureRate = stats.failed_jobs / stats.total_jobs;
// Alert if failure rate exceeds threshold
if (failureRate > 0.05) { // 5% threshold
console.error(`High failure rate detected: ${(failureRate * 100).toFixed(1)}%`);
// Get recent failures to analyze
const failures = await queryJobs({
status: 'failed',
createdAfter: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
limit: 50,
});
// Group by error code
const errorCounts = failures.jobs.reduce((acc, job) => {
const code = job.error_details?.code || 'unknown';
acc[code] = (acc[code] || 0) + 1;
return acc;
}, {});
console.log('Error breakdown:', errorCounts);
// Send alert with error analysis
}
return { failureRate, totalJobs: stats.total_jobs };
}Track Confidence Scores
Monitor extraction confidence to ensure quality:
async function monitorConfidence() {
const recentJobs = await queryJobs({
status: 'completed',
createdAfter: new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString(),
limit: 200,
});
// Calculate average confidence per field
const fieldConfidences: Record<string, number[]> = {};
recentJobs.jobs.forEach(job => {
Object.entries(job.confidence).forEach(([field, score]) => {
if (!fieldConfidences[field]) {
fieldConfidences[field] = [];
}
fieldConfidences[field].push(score as number);
});
});
// Alert on low confidence fields
Object.entries(fieldConfidences).forEach(([field, scores]) => {
const avgConfidence = scores.reduce((a, b) => a + b, 0) / scores.length;
if (avgConfidence < 0.80) {
console.warn(
`Low confidence for field "${field}": ${(avgConfidence * 100).toFixed(1)}%`
);
// Consider updating field description in extractor
}
});
return fieldConfidences;
}Bulk Operations
Reprocess Failed Jobs
Retry failed jobs with an updated extractor:
async function reprocessFailedJobs(extractorId: string) {
// Get all failed jobs for this extractor
const failedJobs = await queryJobs({
extractorId,
status: 'failed',
limit: 100,
});
console.log(`Found ${failedJobs.jobs.length} failed jobs to reprocess`);
// Reprocess each job
const reprocessResults = [];
for (const job of failedJobs.jobs) {
try {
// Download original document
const docResponse = await fetch(job.document_url);
const docBlob = await docResponse.blob();
const docBuffer = Buffer.from(await docBlob.arrayBuffer());
const base64Doc = docBuffer.toString('base64');
// Submit for reprocessing
const newJob = await fetch('https://api.adteco.com/v1/documents', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk_live_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
extractor_id: extractorId,
document: base64Doc,
mime_type: job.mime_type,
metadata: {
...job.metadata,
reprocessed_from: job.id,
},
}),
});
reprocessResults.push({
originalJobId: job.id,
newJobId: (await newJob.json()).id,
});
} catch (error) {
console.error(`Failed to reprocess job ${job.id}:`, error);
}
}
return reprocessResults;
}Export Job Results
Export extracted data to CSV or JSON:
import { createObjectCsvWriter } from 'csv-writer';
async function exportJobResults(extractorId: string, format: 'csv' | 'json') {
const jobs = await getAllJobs(extractorId);
if (format === 'json') {
// Export as JSON
const exportData = jobs.map(job => ({
job_id: job.id,
status: job.status,
created_at: job.created_at,
completed_at: job.completed_at,
processing_time_ms: job.processing_time_ms,
cost_credits: job.cost_credits,
...job.extracted_data,
}));
fs.writeFileSync(
'export.json',
JSON.stringify(exportData, null, 2)
);
} else {
// Export as CSV
const completedJobs = jobs.filter(j => j.status === 'completed');
if (completedJobs.length === 0) {
console.log('No completed jobs to export');
return;
}
// Get all unique field names
const fieldNames = new Set<string>();
completedJobs.forEach(job => {
Object.keys(job.extracted_data).forEach(field => fieldNames.add(field));
});
const csvWriter = createObjectCsvWriter({
path: 'export.csv',
header: [
{ id: 'job_id', title: 'Job ID' },
{ id: 'created_at', title: 'Created At' },
{ id: 'processing_time_ms', title: 'Processing Time (ms)' },
...Array.from(fieldNames).map(field => ({
id: field,
title: field,
})),
],
});
const records = completedJobs.map(job => ({
job_id: job.id,
created_at: job.created_at,
processing_time_ms: job.processing_time_ms,
...job.extracted_data,
}));
await csvWriter.writeRecords(records);
console.log(`Exported ${records.length} jobs to export.csv`);
}
}Job Metadata
Leverage metadata for tracking and organization:
// Submit with tracking metadata
const job = await fetch('https://api.adteco.com/v1/documents', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk_live_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
extractor_id: 'ext_abc123...',
document: base64Document,
mime_type: 'application/pdf',
metadata: {
customer_id: 'cust_123',
customer_name: 'Acme Corp',
invoice_type: 'recurring',
source: 'email',
source_email: 'invoices@acme.com',
department: 'accounting',
priority: 'high',
processed_by: 'user_456',
batch_id: 'batch_202411_001',
},
}),
});
// Query by metadata (if your org has metadata search enabled)
const results = await queryJobs({
metadata: {
customer_id: 'cust_123',
department: 'accounting',
},
limit: 100,
});Performance Optimization
Parallel Processing
Process multiple documents concurrently:
async function processBatch(
extractorId: string,
documents: Array<{ path: string; metadata?: any }>
) {
// Submit all documents in parallel
const submissions = documents.map(async ({ path, metadata }) => {
const buffer = fs.readFileSync(path);
const base64Doc = buffer.toString('base64');
return fetch('https://api.adteco.com/v1/documents', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk_live_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
extractor_id: extractorId,
document: base64Doc,
mime_type: 'application/pdf',
metadata,
}),
}).then(r => r.json());
});
const jobs = await Promise.all(submissions);
console.log(`Submitted ${jobs.length} jobs`);
return jobs;
}
// Process with rate limiting
async function processBatchWithRateLimit(
extractorId: string,
documents: string[],
maxConcurrent: number = 10
) {
const results = [];
for (let i = 0; i < documents.length; i += maxConcurrent) {
const batch = documents.slice(i, i + maxConcurrent);
const batchResults = await processBatch(
extractorId,
batch.map(path => ({ path }))
);
results.push(...batchResults);
// Small delay between batches
if (i + maxConcurrent < documents.length) {
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
return results;
}Caching Results
Cache job results to avoid reprocessing:
import Redis from 'ioredis';
const redis = new Redis();
async function getJobWithCache(jobId: string) {
// Check cache first
const cached = await redis.get(`job:${jobId}`);
if (cached) {
return JSON.parse(cached);
}
// Fetch from API
const response = await fetch(
`https://api.adteco.com/v1/documents/${jobId}`,
{
headers: {
'Authorization': 'Bearer sk_live_your_api_key',
},
}
);
const job = await response.json();
// Cache completed jobs (expire after 7 days)
if (job.status === 'completed') {
await redis.setex(
`job:${jobId}`,
7 * 24 * 60 * 60,
JSON.stringify(job)
);
}
return job;
}Best Practices
Job Retention
Jobs are retained for 90 days by default. Archive important results:
async function archiveCompletedJobs() {
const thirtyDaysAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000).toISOString();
const oldJobs = await queryJobs({
status: 'completed',
createdBefore: thirtyDaysAgo,
limit: 100,
});
// Store in your database
for (const job of oldJobs.jobs) {
await db.jobs.create({
docextract_job_id: job.id,
extractor_id: job.extractor_id,
extracted_data: job.extracted_data,
confidence: job.confidence,
processing_time_ms: job.processing_time_ms,
cost_credits: job.cost_credits,
created_at: job.created_at,
completed_at: job.completed_at,
});
}
console.log(`Archived ${oldJobs.jobs.length} jobs`);
}Error Recovery
Implement robust error recovery:
async function robustProcessDocument(extractorId: string, documentPath: string) {
const maxRetries = 3;
let attempt = 0;
while (attempt < maxRetries) {
try {
const buffer = fs.readFileSync(documentPath);
const base64Doc = buffer.toString('base64');
const response = await fetch('https://api.adteco.com/v1/documents', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk_live_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
extractor_id: extractorId,
document: base64Doc,
mime_type: 'application/pdf',
}),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}: ${await response.text()}`);
}
const job = await response.json();
const results = await waitForResults(job.id);
return results;
} catch (error) {
attempt++;
if (error.code === 'rate_limit_exceeded') {
// Exponential backoff for rate limits
const delay = Math.pow(2, attempt) * 1000;
console.log(`Rate limited. Retrying in ${delay}ms...`);
await new Promise(resolve => setTimeout(resolve, delay));
} else if (attempt >= maxRetries) {
console.error(`Failed after ${maxRetries} attempts:`, error);
throw error;
} else {
console.log(`Attempt ${attempt} failed, retrying...`);
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
}
}