Debugging & Troubleshooting
Learn how to debug Chronos issues and troubleshoot common problems in production.
Debug Logging
Enable detailed logging to understand what Chronos is doing internally.
Environment Variables
Set the DEBUG
environment variable to enable logging:
# Enable all Chronos debug logs
DEBUG="chronos:*" node your-app.js
# Enable specific debug categories
DEBUG="chronos:job,chronos:database" node your-app.js
# On Windows CMD
set DEBUG=chronos:* && node your-app.js
# On Windows PowerShell
$env:DEBUG = "chronos:*"; node your-app.js
Debug Categories
Chronos uses different debug categories:
chronos:job
- Job execution and lifecyclechronos:database
- Database operationschronos:lock
- Job locking mechanismchronos:schedule
- Job scheduling operationschronos:*
- All debug information
Custom Logging
Implement custom logging for better observability:
const debug = require('debug');
const jobDebug = debug('app:jobs');
const errorDebug = debug('app:errors');
scheduler.on('start', (job) => {
jobDebug(`Starting job: ${job.attrs.name} (${job.attrs._id})`);
});
scheduler.on('complete', (job) => {
jobDebug(`Completed job: ${job.attrs.name} in ${Date.now() - job.startTime}ms`);
});
scheduler.on('fail', (error, job) => {
errorDebug(`Job failed: ${job.attrs.name}`, error);
});
Common Issues & Solutions
1. Jobs Not Running
Problem: Jobs are scheduled but never execute.
Debugging Steps:
// Check if scheduler is started
console.log('Scheduler running:', scheduler._processInterval !== null);
// Check for pending jobs
const pendingJobs = await scheduler.jobs({
nextRunAt: { $lte: new Date() },
disabled: { $ne: true },
lockedAt: null
});
console.log(`Pending jobs: ${pendingJobs.length}`);
// Check if job definitions exist
console.log('Defined jobs:', Object.keys(scheduler._definitions));
Common Causes:
- Scheduler not started with
await scheduler.start()
- Job definition missing or incorrect name
- Jobs are disabled
- Database connection issues
2. Jobs Stuck in "Locked" State
Problem: Jobs are locked but never complete or fail.
Debugging:
// Find stuck jobs
const stuckJobs = await scheduler.jobs({
lockedAt: { $exists: true },
lastRunAt: { $exists: false },
// Locked for more than 10 minutes
lockedAt: { $lt: new Date(Date.now() - 10 * 60 * 1000) }
});
console.log(`Found ${stuckJobs.length} stuck jobs`);
// Unlock stuck jobs
for (const job of stuckJobs) {
job.attrs.lockedAt = null;
await job.save();
}
Solutions:
- Increase
lockLifetime
for long-running jobs - Use
job.touch()
in long-running processes - Implement proper error handling
- Monitor and clean up dead locks
3. High Memory Usage
Problem: Memory usage keeps growing over time.
Debugging:
// Monitor memory usage
setInterval(() => {
const usage = process.memoryUsage();
console.log({
rss: Math.round(usage.rss / 1024 / 1024),
heapUsed: Math.round(usage.heapUsed / 1024 / 1024),
external: Math.round(usage.external / 1024 / 1024),
lockedJobs: scheduler._lockedJobs.length,
runningJobs: scheduler._runningJobs.length
});
}, 30000);
Solutions:
- Clean up completed jobs regularly
- Reduce job data size
- Lower
lockLimit
setting - Implement job result cleanup
4. Database Connection Issues
Problem: MongoDB connection errors or timeouts.
Monitoring:
scheduler.on('error', (error) => {
console.error('Scheduler error:', error);
// Check if it's a connection error
if (error.name === 'MongoNetworkError') {
console.log('MongoDB connection lost, attempting reconnect...');
}
});
// Monitor database health
const checkDbHealth = async () => {
try {
await scheduler._db.admin().ping();
console.log('Database connection: OK');
} catch (error) {
console.error('Database connection: FAILED', error.message);
}
};
setInterval(checkDbHealth, 60000); // Check every minute
Production Monitoring
Job Processing Dashboard
Create a real-time monitoring dashboard:
const express = require('express');
const app = express();
// Real-time job stats
app.get('/api/scheduler/stats', async (req, res) => {
try {
const stats = await Promise.all([
// Pending jobs
scheduler.jobs({
nextRunAt: { $lte: new Date() },
disabled: { $ne: true },
lockedAt: null
}),
// Running jobs
scheduler.jobs({
lockedAt: { $exists: true },
lastRunAt: { $exists: true },
lastFinishedAt: { $exists: false }
}),
// Failed jobs (last 24h)
scheduler.jobs({
failedAt: {
$gte: new Date(Date.now() - 24 * 60 * 60 * 1000)
}
}),
// Completed jobs (last 24h)
scheduler.jobs({
lastFinishedAt: {
$gte: new Date(Date.now() - 24 * 60 * 60 * 1000)
},
failedAt: { $exists: false }
})
]);
res.json({
pending: stats[0].length,
running: stats[1].length,
failed24h: stats[2].length,
completed24h: stats[3].length,
lockedJobs: scheduler._lockedJobs.length,
runningJobs: scheduler._runningJobs.length
});
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Job details by type
app.get('/api/scheduler/jobs/:type', async (req, res) => {
try {
const jobs = await scheduler.jobs(
{ name: req.params.type },
{ nextRunAt: -1 },
50 // Limit to 50 jobs
);
res.json(jobs.map(job => ({
id: job.attrs._id,
name: job.attrs.name,
nextRunAt: job.attrs.nextRunAt,
lastRunAt: job.attrs.lastRunAt,
lastFinishedAt: job.attrs.lastFinishedAt,
failedAt: job.attrs.failedAt,
failReason: job.attrs.failReason,
data: job.attrs.data
})));
} catch (error) {
res.status(500).json({ error: error.message });
}
});
Error Alerting
Set up alerts for critical issues:
const alerting = {
failureThreshold: 5, // Alert after 5 consecutive failures
delayThreshold: 300000, // Alert if job delayed by 5+ minutes
jobFailures: new Map(),
checkJobFailure(job, error) {
const jobName = job.attrs.name;
const failures = this.jobFailures.get(jobName) || 0;
this.jobFailures.set(jobName, failures + 1);
if (failures >= this.failureThreshold) {
this.sendAlert('CRITICAL', `Job ${jobName} has failed ${failures} times consecutively`, {
error: error.message,
job: job.attrs
});
}
},
checkJobDelay(job) {
const delay = Date.now() - job.attrs.nextRunAt.getTime();
if (delay > this.delayThreshold) {
this.sendAlert('WARNING', `Job ${job.attrs.name} is delayed by ${Math.round(delay/1000)} seconds`, {
job: job.attrs
});
}
},
sendAlert(level, message, data) {
console.log(`[${level}] ${message}`, data);
// Send to your alerting system (Slack, PagerDuty, etc.)
// sendToSlack(message, data);
// sendToPagerDuty(level, message, data);
}
};
scheduler.on('fail', (error, job) => {
alerting.checkJobFailure(job, error);
});
scheduler.on('start', (job) => {
alerting.checkJobDelay(job);
// Reset failure count on successful start
alerting.jobFailures.set(job.attrs.name, 0);
});
Health Checks
Implement comprehensive health checks:
const healthCheck = {
async checkSchedulerHealth() {
const checks = {
schedulerRunning: scheduler._processInterval !== null,
databaseConnected: false,
jobsProcessing: false,
memoryUsage: process.memoryUsage(),
uptime: process.uptime()
};
// Check database connection
try {
await scheduler._db.admin().ping();
checks.databaseConnected = true;
} catch (error) {
checks.databaseError = error.message;
}
// Check if jobs are being processed
const recentJobs = await scheduler.jobs({
lastRunAt: {
$gte: new Date(Date.now() - 5 * 60 * 1000)
}
});
checks.jobsProcessing = recentJobs.length > 0;
// Calculate health score
const healthScore = [
checks.schedulerRunning,
checks.databaseConnected,
checks.memoryUsage.heapUsed < 1024 * 1024 * 1024 // Less than 1GB
].filter(Boolean).length / 3;
return {
healthy: healthScore >= 0.66,
score: healthScore,
checks
};
}
};
// Health check endpoint
app.get('/health', async (req, res) => {
const health = await healthCheck.checkSchedulerHealth();
res.status(health.healthy ? 200 : 503).json(health);
});
Debugging Tools
Job Inspector
Create a tool to inspect job state:
class JobInspector {
constructor(scheduler) {
this.scheduler = scheduler;
}
async inspectJob(jobId) {
const job = await this.scheduler.jobs({ _id: jobId });
if (!job.length) {
return { error: 'Job not found' };
}
const jobData = job[0].attrs;
const now = new Date();
return {
id: jobData._id,
name: jobData.name,
status: this.getJobStatus(jobData),
nextRun: jobData.nextRunAt,
isOverdue: jobData.nextRunAt < now,
overdueBy: jobData.nextRunAt < now ? now - jobData.nextRunAt : 0,
isLocked: !!jobData.lockedAt,
lockAge: jobData.lockedAt ? now - jobData.lockedAt : 0,
lastRun: jobData.lastRunAt,
timeSinceLastRun: jobData.lastRunAt ? now - jobData.lastRunAt : null,
failureInfo: {
failedAt: jobData.failedAt,
failReason: jobData.failReason,
failCount: jobData.failCount || 0
},
data: jobData.data
};
}
getJobStatus(jobData) {
if (jobData.disabled) return 'disabled';
if (jobData.lockedAt && !jobData.lastFinishedAt) return 'running';
if (jobData.failedAt) return 'failed';
if (jobData.lastFinishedAt) return 'completed';
if (jobData.nextRunAt <= new Date()) return 'pending';
return 'scheduled';
}
}
const inspector = new JobInspector(scheduler);
// Usage
app.get('/api/jobs/:id/inspect', async (req, res) => {
const result = await inspector.inspectJob(req.params.id);
res.json(result);
});
This comprehensive debugging and troubleshooting guide will help you identify and resolve issues quickly in production environments.