Connection conn = getNonManagedTXConnection(); try { // Other than the first time, always checkin first to make sure there is // work to be done before we acquire the lock (since that is expensive, // and is almost never necessary). This must be done in a separate // transaction to prevent a deadlock under recovery conditions. List<SchedulerStateRecord> failedRecords = null; if (!firstCheckIn) { failedRecords = clusterCheckIn(conn); commitConnection(conn); } if (firstCheckIn || (failedRecords.size() > 0)) { getLockHandler().obtainLock(conn, LOCK_STATE_ACCESS); transStateOwner = true; // Now that we own the lock, make sure we still have work to do. // The first time through, we also need to make sure we update/create our state record failedRecords = (firstCheckIn) ? clusterCheckIn(conn) : findFailedInstances(conn); if (failedRecords.size() > 0) { getLockHandler().obtainLock(conn, LOCK_TRIGGER_ACCESS); //getLockHandler().obtainLock(conn, LOCK_JOB_ACCESS); transOwner = true; clusterRecover(conn, failedRecords); recovered = true; } } commitConnection(conn); } catch (JobPersistenceException e) { rollbackConnection(conn); throw e; } finally { try { releaseLock(LOCK_TRIGGER_ACCESS, transOwner); } finally { try { releaseLock(LOCK_STATE_ACCESS, transStateOwner); } finally { cleanupConnection(conn); } } }
protected List<SchedulerStateRecord> findFailedInstances(Connection conn) throws JobPersistenceException { try { List<SchedulerStateRecord> failedInstances = new LinkedList<SchedulerStateRecord>(); boolean foundThisScheduler = false; long timeNow = System.currentTimeMillis(); // 获取 qrzt_scheduler_state 表中,记录。对应sql是:SELECT * FROM QRTZ_SCHEDULER_STATE WHERE SCHED_NAME = 'zl',其中SCHED_NAME是配置文件中的org.quartz.scheduler.instanceName值 List<SchedulerStateRecord> states = getDelegate().selectSchedulerStateRecords(conn, null);
for(SchedulerStateRecord rec: states) { // find own record... if (rec.getSchedulerInstanceId().equals(getInstanceId())) { foundThisScheduler = true; if (firstCheckIn) { failedInstances.add(rec); } } else { // find failed instances... if (calcFailedIfAfter(rec) < timeNow) { failedInstances.add(rec); } } } // The first time through, also check for orphaned fired triggers. if (firstCheckIn) { failedInstances.addAll(findOrphanedFailedInstances(conn, states)); } // If not the first time but we didn't find our own instance, then // Someone must have done recovery for us. // !foundThisScheduler 表示 应用程序没有找到 自己的 instance // !firstCheckIn 表示 应该表示 应用程序是否为第一次checkIn if ((!foundThisScheduler) && (!firstCheckIn)) { // FUTURE_TODO: revisit when handle self-failed-out impl'ed (see FUTURE_TODO in clusterCheckIn() below) getLog().warn( "This scheduler instance (" + getInstanceId() + ") is still " + "active but was recovered by another instance in the cluster. " + "This may cause inconsistent behavior."); } return failedInstances; } catch (Exception e) { lastCheckin = System.currentTimeMillis(); thrownew JobPersistenceException("Failure identifying failed instances when checking-in: " + e.getMessage(), e); } }