My App
DocExtract API

Webhooks

Receive real-time notifications for document processing events

Webhooks

Webhooks provide real-time notifications when document processing jobs complete. Instead of polling the API repeatedly, configure a webhook endpoint to receive event notifications automatically.

Why Use Webhooks?

Benefits over polling:

  • Real-time: Receive notifications instantly when jobs complete
  • Efficient: No need to poll the API repeatedly
  • Scalable: Handle high-volume processing without rate limits
  • Reliable: Automatic retry mechanism for failed deliveries

Webhook Events

DocExtract sends webhook notifications for these events:

EventDescriptionWhen Triggered
job.completedJob completed successfullyExtraction finished
job.failedJob failedExtraction failed
credits.low_balanceCredit balance is lowBalance falls below threshold
credits.depletedCredits fully depletedBalance reaches zero

Setting Up Webhooks

1. Create an Endpoint

Create an HTTPS endpoint that accepts POST requests:

import express from 'express';
import crypto from 'crypto';

const app = express();
app.use(express.raw({ type: 'application/json' }));

app.post('/webhooks/docextract', (req, res) => {
  // Verify webhook signature
  const signature = req.headers['x-docextract-signature'] as string;
  const isValid = verifyWebhookSignature(
    req.body,
    signature,
    process.env.WEBHOOK_SECRET!
  );

  if (!isValid) {
    return res.status(401).send('Invalid signature');
  }

  // Parse event
  const event = JSON.parse(req.body.toString());

  // Handle event
  handleWebhookEvent(event);

  // Respond quickly (within 5 seconds)
  res.status(200).send('OK');
});

function verifyWebhookSignature(
  payload: Buffer,
  signature: string,
  secret: string
): boolean {
  const expectedSignature = crypto
    .createHmac('sha256', secret)
    .update(payload)
    .digest('hex');

  return crypto.timingSafeEqual(
    Buffer.from(signature),
    Buffer.from(expectedSignature)
  );
}

async function handleWebhookEvent(event: any) {
  switch (event.event) {
    case 'job.completed':
      await handleJobCompleted(event.data);
      break;

    case 'job.failed':
      await handleJobFailed(event.data);
      break;

    case 'credits.low_balance':
      await handleLowBalance(event.data);
      break;

    default:
      console.log('Unknown event type:', event.event);
  }
}

app.listen(3000, () => {
  console.log('Webhook endpoint listening on port 3000');
});
from flask import Flask, request
import hmac
import hashlib

app = Flask(__name__)

@app.route('/webhooks/docextract', methods=['POST'])
def webhook_handler():
    # Verify webhook signature
    signature = request.headers.get('X-DocExtract-Signature')
    payload = request.get_data()

    if not verify_webhook_signature(payload, signature):
        return 'Invalid signature', 401

    # Parse event
    event = request.get_json()

    # Handle event
    handle_webhook_event(event)

    # Respond quickly
    return 'OK', 200

def verify_webhook_signature(payload: bytes, signature: str) -> bool:
    secret = os.environ['WEBHOOK_SECRET'].encode()
    expected_signature = hmac.new(
        secret,
        payload,
        hashlib.sha256
    ).hexdigest()

    return hmac.compare_digest(signature, expected_signature)

def handle_webhook_event(event):
    event_type = event['event']

    if event_type == 'job.completed':
        handle_job_completed(event['data'])
    elif event_type == 'job.failed':
        handle_job_failed(event['data'])
    elif event_type == 'credits.low_balance':
        handle_low_balance(event['data'])
    else:
        print(f'Unknown event type: {event_type}')

if __name__ == '__main__':
    app.run(port=3000)
package main

import (
    "crypto/hmac"
    "crypto/sha256"
    "encoding/hex"
    "encoding/json"
    "io"
    "log"
    "net/http"
    "os"
)

type WebhookEvent struct {
    Event string                 `json:"event"`
    Data  map[string]interface{} `json:"data"`
}

func webhookHandler(w http.ResponseWriter, r *http.Request) {
    // Read payload
    payload, err := io.ReadAll(r.Body)
    if err != nil {
        http.Error(w, "Failed to read body", http.StatusBadRequest)
        return
    }

    // Verify signature
    signature := r.Header.Get("X-DocExtract-Signature")
    if !verifyWebhookSignature(payload, signature) {
        http.Error(w, "Invalid signature", http.StatusUnauthorized)
        return
    }

    // Parse event
    var event WebhookEvent
    if err := json.Unmarshal(payload, &event); err != nil {
        http.Error(w, "Invalid JSON", http.StatusBadRequest)
        return
    }

    // Handle event
    handleWebhookEvent(event)

    // Respond quickly
    w.WriteHeader(http.StatusOK)
    w.Write([]byte("OK"))
}

func verifyWebhookSignature(payload []byte, signature string) bool {
    secret := []byte(os.Getenv("WEBHOOK_SECRET"))
    mac := hmac.New(sha256.New, secret)
    mac.Write(payload)
    expectedSignature := hex.EncodeToString(mac.Sum(nil))

    return hmac.Equal([]byte(signature), []byte(expectedSignature))
}

func handleWebhookEvent(event WebhookEvent) {
    switch event.Event {
    case "job.completed":
        handleJobCompleted(event.Data)
    case "job.failed":
        handleJobFailed(event.Data)
    case "credits.low_balance":
        handleLowBalance(event.Data)
    default:
        log.Printf("Unknown event type: %s", event.Event)
    }
}

func main() {
    http.HandleFunc("/webhooks/docextract", webhookHandler)
    log.Fatal(http.ListenAndServe(":3000", nil))
}

2. Register Your Webhook

Configure your webhook URL in the Adteco dashboard:

  1. Go to DocExtract Settings
  2. Click Webhooks tab
  3. Enter your webhook URL (must be HTTPS)
  4. Copy the Webhook Secret (used for signature verification)
  5. Select events to receive
  6. Click Save

3. Test Your Webhook

Send a test event to verify your endpoint:

curl -X POST https://api.adteco.com/v1/webhooks/test \
  -H "Authorization: Bearer sk_live_your_api_key" \
  -H "Content-Type: application/json" \
  -d '{
    "webhook_url": "https://your-domain.com/webhooks/docextract"
  }'

You should receive a test event at your endpoint:

{
  "event": "webhook.test",
  "data": {
    "message": "This is a test webhook event",
    "timestamp": "2024-11-23T10:00:00Z"
  }
}

Webhook Payloads

job.completed

Sent when a document processing job completes successfully.

{
  "event": "job.completed",
  "data": {
    "id": "job_abc123...",
    "org_id": "org_xyz789...",
    "extractor_id": "ext_def456...",
    "status": "completed",
    "extracted_data": {
      "invoice_number": "INV-2024-001",
      "total_amount": 1250.50,
      "invoice_date": "2024-11-15"
    },
    "confidence": {
      "invoice_number": 0.98,
      "total_amount": 0.95,
      "invoice_date": 0.99
    },
    "processing_time_ms": 4532,
    "cost_credits": 2,
    "metadata": {
      "customer_id": "cust_123",
      "source": "email"
    },
    "created_at": "2024-11-23T10:00:00Z",
    "completed_at": "2024-11-23T10:00:05Z"
  }
}

Example Handler:

async function handleJobCompleted(data: any) {
  console.log(`Job ${data.id} completed successfully`);

  // Save to database
  await db.invoices.create({
    docextract_job_id: data.id,
    invoice_number: data.extracted_data.invoice_number,
    total_amount: data.extracted_data.total_amount,
    invoice_date: data.extracted_data.invoice_date,
    confidence_scores: data.confidence,
  });

  // Send notification
  if (data.metadata?.customer_id) {
    await sendEmail({
      to: data.metadata.email,
      subject: 'Invoice Processed',
      body: `Your invoice ${data.extracted_data.invoice_number} has been processed.`,
    });
  }
}

job.failed

Sent when a document processing job fails.

{
  "event": "job.failed",
  "data": {
    "id": "job_abc123...",
    "org_id": "org_xyz789...",
    "extractor_id": "ext_def456...",
    "status": "failed",
    "error_details": {
      "code": "document_unreadable",
      "message": "Document quality is too low to extract text",
      "suggestion": "Please provide a higher quality scan or image"
    },
    "processing_time_ms": 1200,
    "cost_credits": 0,
    "metadata": {
      "customer_id": "cust_123"
    },
    "created_at": "2024-11-23T10:00:00Z",
    "failed_at": "2024-11-23T10:00:01Z"
  }
}

Example Handler:

async function handleJobFailed(data: any) {
  console.error(`Job ${data.id} failed:`, data.error_details.message);

  // Log error
  await errorLogger.log({
    type: 'docextract_job_failed',
    job_id: data.id,
    error_code: data.error_details.code,
    error_message: data.error_details.message,
  });

  // Notify user
  if (data.metadata?.customer_id) {
    await sendEmail({
      to: data.metadata.email,
      subject: 'Document Processing Failed',
      body: `
        We couldn't process your document.
        Reason: ${data.error_details.message}

        ${data.error_details.suggestion}
      `,
    });
  }

  // Retry if error is temporary
  const retryableErrors = ['temporary_error', 'service_unavailable'];
  if (retryableErrors.includes(data.error_details.code)) {
    await retryJob(data.id);
  }
}

credits.low_balance

Sent when credit balance falls below configured threshold.

{
  "event": "credits.low_balance",
  "data": {
    "org_id": "org_xyz789...",
    "current_balance": 50,
    "threshold": 100,
    "percentage_remaining": 2.5,
    "estimated_jobs_remaining": 25,
    "timestamp": "2024-11-23T10:00:00Z"
  }
}

Example Handler:

async function handleLowBalance(data: any) {
  console.warn(`⚠️ Low credit balance: ${data.current_balance} credits`);

  // Send alert to admin
  await sendEmail({
    to: 'admin@company.com',
    subject: '⚠️ DocExtract Credits Running Low',
    body: `
      Your DocExtract credit balance is low.

      Current Balance: ${data.current_balance} credits
      Estimated Jobs Remaining: ${data.estimated_jobs_remaining}

      Purchase more credits: https://app.adteco.com/docextract/billing
    `,
  });

  // Create notification in dashboard
  await db.notifications.create({
    type: 'low_credits',
    message: `Credit balance is low: ${data.current_balance} credits remaining`,
    action_url: 'https://app.adteco.com/docextract/billing',
  });
}

credits.depleted

Sent when credit balance reaches zero.

{
  "event": "credits.depleted",
  "data": {
    "org_id": "org_xyz789...",
    "depleted_at": "2024-11-23T10:00:00Z",
    "jobs_queued": 5
  }
}

Security

Verify Webhook Signatures

Always verify webhook signatures to ensure requests are from DocExtract.

Each webhook request includes an X-DocExtract-Signature header containing an HMAC SHA256 signature of the request body.

import crypto from 'crypto';

function verifyWebhookSignature(
  payload: string | Buffer,
  signature: string,
  secret: string
): boolean {
  const expectedSignature = crypto
    .createHmac('sha256', secret)
    .update(payload)
    .digest('hex');

  // Use constant-time comparison to prevent timing attacks
  return crypto.timingSafeEqual(
    Buffer.from(signature),
    Buffer.from(expectedSignature)
  );
}

// Usage in Express
app.post('/webhooks/docextract', express.raw({ type: 'application/json' }), (req, res) => {
  const signature = req.headers['x-docextract-signature'] as string;
  const secret = process.env.WEBHOOK_SECRET!;

  if (!verifyWebhookSignature(req.body, signature, secret)) {
    return res.status(401).send('Invalid signature');
  }

  // Process webhook...
  const event = JSON.parse(req.body.toString());
  handleWebhookEvent(event);

  res.status(200).send('OK');
});

Best Practices

  1. Use HTTPS: Webhook URLs must use HTTPS
  2. Verify signatures: Always verify the signature before processing
  3. Respond quickly: Return 200 within 5 seconds
  4. Process async: Handle events in background jobs
  5. Idempotency: Events may be delivered more than once
  6. Log events: Keep records for debugging and auditing

Retry Mechanism

If your endpoint fails to respond or returns an error, DocExtract will retry the webhook:

AttemptDelayTotal Elapsed
1Immediate0s
25 seconds5s
330 seconds35s
42 minutes2m 35s
510 minutes12m 35s
61 hour1h 12m 35s

After 6 failed attempts, the webhook is marked as failed and won't be retried automatically.

Handling Retries

Make your webhook handler idempotent to safely handle duplicate deliveries:

async function handleWebhookEvent(event: any) {
  const eventId = event.data.id;

  // Check if we've already processed this event
  const existing = await db.processedEvents.findOne({ event_id: eventId });

  if (existing) {
    console.log(`Event ${eventId} already processed. Skipping.`);
    return;
  }

  // Process event
  await processEvent(event);

  // Mark as processed
  await db.processedEvents.create({
    event_id: eventId,
    event_type: event.event,
    processed_at: new Date(),
  });
}

Debugging Webhooks

View Webhook Logs

Check webhook delivery status in the dashboard:

  1. Go to DocExtract Settings
  2. Click Webhooks tab
  3. Click View Logs

The logs show:

  • Timestamp
  • Event type
  • HTTP status code
  • Response time
  • Retry attempts
  • Error messages

Local Testing

Use tools like ngrok to test webhooks locally:

# Install ngrok
npm install -g ngrok

# Start your local server
node server.js

# Create tunnel to localhost
ngrok http 3000

# Use the ngrok URL as your webhook endpoint
# Example: https://abc123.ngrok.io/webhooks/docextract

Manual Replay

Replay a webhook event from the dashboard:

  1. Go to webhook logs
  2. Find the event you want to replay
  3. Click Replay
  4. Event will be sent again to your endpoint

Advanced Patterns

Queue-Based Processing

Process webhooks asynchronously using a message queue:

import { Queue } from 'bullmq';

const jobQueue = new Queue('docextract-jobs');

app.post('/webhooks/docextract', async (req, res) => {
  // Verify signature
  const signature = req.headers['x-docextract-signature'] as string;
  if (!verifyWebhookSignature(req.body, signature, process.env.WEBHOOK_SECRET!)) {
    return res.status(401).send('Invalid signature');
  }

  const event = JSON.parse(req.body.toString());

  // Add to queue for async processing
  await jobQueue.add('process-webhook', event);

  // Respond immediately
  res.status(200).send('OK');
});

// Worker processes queue
const worker = new Worker('docextract-jobs', async job => {
  const event = job.data;
  await handleWebhookEvent(event);
});

Fan-Out Pattern

Distribute webhook events to multiple handlers:

const handlers = [
  handleDatabase,
  handleNotifications,
  handleAnalytics,
  handleIntegrations,
];

async function handleWebhookEvent(event: any) {
  // Execute all handlers in parallel
  await Promise.allSettled(
    handlers.map(handler => handler(event))
  );
}

async function handleDatabase(event: any) {
  if (event.event === 'job.completed') {
    await db.jobs.create(event.data);
  }
}

async function handleNotifications(event: any) {
  if (event.event === 'job.completed') {
    await sendNotification(event.data);
  }
}

async function handleAnalytics(event: any) {
  await analytics.track({
    event: event.event,
    properties: event.data,
  });
}

Webhook vs Polling

AspectWebhooksPolling
Real-time✅ Instant❌ Delayed
Efficiency✅ No wasted requests❌ Many empty responses
Complexity⚠️ Requires endpoint setup✅ Simple to implement
Reliability⚠️ Needs retry logic✅ Client controls retries
Rate Limits✅ No limits❌ Subject to rate limits

Recommendation: Use webhooks for production systems. Use polling only for development/testing.

Next Steps