Use without Airtable in Lambda, Airflow, Glue, GCP, and more
Choose how to use the enricher:
| Mode | Use Case | Needs Airtable? |
|---|---|---|
batch |
Read from Airtable, enrich, write back | โ Yes - requires apiKey, baseId, tableId |
single |
Enrich specific Airtable records | โ Yes - same as batch + recordIds |
api |
Standalone enrichment (no Airtable) | โ No - just provide companies array |
Use without Airtable - perfect for integrations:
POST https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items?token=YOUR_TOKEN
{
"mode": "api",
"companies": [
{"companyName": "Acme Corp", "website": "https://acme.example"}
]
}
Max 1000 companies per run (API mode)
For Airtable mode: Max 100 records per run
Ready-to-use code for common platforms. All examples in examples/ directory.
"llm": {"apiKey": "sk-..."}. Examples show reading from infrastructure env vars - that's optional and only for your own code.
File: examples/lambda_python.py
import json
import os
import requests
def lambda_handler(event, context):
config = {
"mode": "api",
"companies": [
{"companyName": "Acme Corp", "website": "https://acme.example"}
],
"llm": {
"enabled": True,
"provider": "openai",
"apiKey": os.environ['OPENAI_API_KEY']
}
}
response = requests.post(
"https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items",
params={"token": os.environ['APIFY_TOKEN']},
json=config,
timeout=300
)
results = response.json()
enriched = [r for r in results if r.get('type') == 'ENRICHED_COMPANY']
return {'statusCode': 200, 'body': json.dumps({'enriched': enriched})}
Environment Variables: APIFY_TOKEN, OPENAI_API_KEY
File: examples/lambda_node.js
exports.handler = async (event) => {
const config = {
mode: "api",
companies: [
{ companyName: "Acme Corp", website: "https://acme.example" }
],
llm: {
enabled: true,
provider: "openai",
apiKey: process.env.OPENAI_API_KEY
}
};
const response = await fetch(
`https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items?token=${process.env.APIFY_TOKEN}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(config)
}
);
const results = await response.json();
const enriched = results.filter(r => r.type === 'ENRICHED_COMPANY');
return { statusCode: 200, body: JSON.stringify({ enriched }) };
};
File: examples/gcp_function_python.py
import functions_framework
import requests
import os
@functions_framework.http
def enrich_leads(request):
request_json = request.get_json(silent=True)
companies = request_json.get('companies', [])
config = {
"mode": "api",
"companies": companies,
"llm": {
"enabled": True,
"provider": "openai",
"apiKey": os.environ['OPENAI_API_KEY']
}
}
response = requests.post(
"https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items",
params={"token": os.environ['APIFY_TOKEN']},
json=config,
timeout=300
)
results = response.json()
enriched = [r for r in results if r.get('type') == 'ENRICHED_COMPANY']
return json.dumps({'enriched': enriched})
File: examples/glue_job.py
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
import requests
args = getResolvedOptions(sys.argv, ['JOB_NAME', 'APIFY_TOKEN', 'OPENAI_API_KEY'])
sc = SparkContext()
glueContext = GlueContext(sc)
# Read input data
input_df = glueContext.create_dynamic_frame.from_catalog(
database="your_database",
table_name="companies"
).toDF()
# Enrich each company
for row in input_df.collect():
config = {
"mode": "api",
"companies": [{"companyName": row.company_name, "website": row.website}],
"llm": {"enabled": True, "provider": "openai", "apiKey": args['OPENAI_API_KEY']}
}
response = requests.post(
"https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items",
params={"token": args['APIFY_TOKEN']},
json=config
)
# Process results...
File: examples/ecs_task_definition.json
{
"family": "lead-enricher",
"networkMode": "awsvpc",
"requiresCompatibilities": ["FARGATE"],
"cpu": "256",
"memory": "512",
"containerDefinitions": [{
"name": "lead-enricher",
"image": "python:3.11-slim",
"command": ["python", "-c", "import requests; ..."],
"secrets": [
{"name": "APIFY_TOKEN", "valueFrom": "arn:aws:secretsmanager:..."},
{"name": "OPENAI_API_KEY", "valueFrom": "arn:aws:secretsmanager:..."}
]
}]
}
File: examples/docker_run.sh
docker run --rm \
-e APIFY_TOKEN="${APIFY_TOKEN}" \
-e OPENAI_API_KEY="${OPENAI_API_KEY}" \
python:3.11-slim \
bash -c "pip install requests && python -c 'import requests; ...'"
File: examples/github_actions.yml
name: Enrich Leads
on:
schedule:
- cron: '0 2 * * *' # Daily at 2 AM
jobs:
enrich:
runs-on: ubuntu-latest
steps:
- name: Enrich Leads
env:
APIFY_TOKEN: ${{ secrets.APIFY_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
curl -X POST \
"https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items?token=${APIFY_TOKEN}" \
-H "Content-Type: application/json" \
-d '{"mode":"api","companies":[...]}'
File: examples/airflow_dag.py
from airflow import DAG
from airflow.operators.python import PythonOperator
import requests
def enrich_leads(**context):
config = {
"mode": "api",
"companies": [{"companyName": "Acme", "website": "https://acme.example"}],
"llm": {"enabled": True, "provider": "openai", "apiKey": os.environ['OPENAI_API_KEY']}
}
response = requests.post(
"https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items",
params={"token": os.environ['APIFY_TOKEN']},
json=config
)
return response.json()
with DAG('enrich_leads', schedule_interval='0 2 * * *') as dag:
PythonOperator(task_id='enrich', python_callable=enrich_leads)
{
"mode": "batch",
"airtable": {
"apiKey": "patXXXXXXXXXXXXXX",
"baseId": "appXXXXXXXXXXXXXX",
"tableId": "tblXXXXXXXXXXXXXX",
"inputFields": {
"companyName": "Company Name", // Map to your column
"website": "Website"
},
"outputFields": {
"email": "Contact Email", // Map to your column
"phone": "Phone Number",
"leadScore": "Lead Score"
}
}
}
{
"mode": "api",
"companies": [
{"companyName": "Acme", "website": "https://acme.example"}
],
"llm": {
"enabled": true,
"provider": "openai", // or "anthropic", "bedrock"
"apiKey": "sk-...",
"model": "gpt-4o" // optional
},
"enrichment": {
"sources": ["google_maps", "website", "hunter"],
"hunter": {"enabled": true, "apiKey": "..."}
},
"scoring": {
"enabled": true,
"icpCriteria": "B2B SaaS, 50-500 employees, US-based"
}
}
| Provider | Model | API Key |
|---|---|---|
| OpenAI | gpt-4o | Get key |
| Anthropic | claude-haiku-4-5 | Get key |
| Bedrock | claude-haiku-4-5 | Setup |
Integrate with Salesforce, HubSpot, Pipedrive, Zoho, and other CRMs.
Receive enriched data via POST webhook:
{
"webhookUrl": "https://your-crm.com/webhook/enriched-leads"
}
| Platform | Use Case |
|---|---|
| Zapier | New CRM lead โ Enrich โ Update CRM |
| Make (Integromat) | Schedule enrichment, sync to CRM |
| n8n | Self-hosted workflow automation |
Call from CRM automation (Salesforce Apex, HubSpot workflows, etc.):
// HubSpot Workflow Custom Code
const response = await fetch(
'https://api.apify.com/v2/acts/datahq~airtable-lead-enricher/run-sync-get-dataset-items?token=YOUR_TOKEN',
{
method: 'POST',
body: JSON.stringify({
mode: 'api',
companies: [{ companyName: company.name, website: company.website }]
})
}
);
// Update HubSpot contact with enriched data
const enriched = await response.json();
// ... update logic ...
Using Make.com:
Using Zapier:
[
{
"type": "RUN_STATS",
"stats": {"companiesProcessed": 1, "enrichmentSuccessful": 1, "llmEnabled": true}
},
{
"type": "ENRICHED_COMPANY",
"input": {"companyName": "Acme Corp", "website": "https://acme.example"},
"output": {
"email": "contact@acme.example",
"phone": "+1 555 0100",
"leadScore": "Good",
"icpScore": "Fair",
"summary": "...",
"techStack": ["React", "Node.js"],
"enrichedAt": "2025-12-21T10:00:00.000Z"
},
"success": true
}
]
| Field | Source |
|---|---|
| Website / Hunter | |
| phone | Maps / Website |
| leadScore | AI (Excellent/Good/Fair/Poor/Bad) |
| icpScore | AI (Excellent/Good/Fair/Poor/Bad) |
| linkedinUrl | Hunter |
| techStack | Website |
| summary | AI |