Merge branch 'master' of github.com-dotta:paperclipai/paperclip
* 'master' of github.com-dotta:paperclipai/paperclip: fix: resolve type errors in process-lost-reaper PR fix(heartbeat): prevent false process_lost failures on queued and non-child-process runs Revert "Merge pull request #707 from paperclipai/nm/premerge-lockfile-refresh" fix: ensure embedded PostgreSQL databases use UTF-8 encoding
This commit is contained in:
39
.github/workflows/pr-policy.yml
vendored
39
.github/workflows/pr-policy.yml
vendored
@@ -13,8 +13,6 @@ jobs:
|
|||||||
policy:
|
policy:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
timeout-minutes: 10
|
timeout-minutes: 10
|
||||||
permissions:
|
|
||||||
pull-requests: read
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
@@ -33,38 +31,19 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
node-version: 20
|
node-version: 20
|
||||||
|
|
||||||
- name: Enforce lockfile policy when manifests change
|
- name: Block manual lockfile edits
|
||||||
env:
|
if: github.head_ref != 'chore/refresh-lockfile'
|
||||||
GH_TOKEN: ${{ github.token }}
|
|
||||||
run: |
|
run: |
|
||||||
changed="$(gh api "repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files" --paginate --jq '.[].filename')"
|
changed="$(git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}")"
|
||||||
manifest_pattern='(^|/)package\.json$|^pnpm-workspace\.yaml$|^\.npmrc$|^pnpmfile\.(cjs|js|mjs)$'
|
|
||||||
|
|
||||||
manifest_changed=false
|
|
||||||
lockfile_changed=false
|
|
||||||
|
|
||||||
if printf '%s\n' "$changed" | grep -Eq "$manifest_pattern"; then
|
|
||||||
manifest_changed=true
|
|
||||||
fi
|
|
||||||
|
|
||||||
if printf '%s\n' "$changed" | grep -qx 'pnpm-lock.yaml'; then
|
if printf '%s\n' "$changed" | grep -qx 'pnpm-lock.yaml'; then
|
||||||
lockfile_changed=true
|
echo "Do not commit pnpm-lock.yaml in pull requests. CI owns lockfile updates."
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "$lockfile_changed" = true ] && [ "$manifest_changed" != true ]; then
|
|
||||||
echo "pnpm-lock.yaml changed without a dependency manifest change." >&2
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$manifest_changed" = true ]; then
|
- name: Validate dependency resolution when manifests change
|
||||||
|
run: |
|
||||||
|
changed="$(git diff --name-only "${{ github.event.pull_request.base.sha }}" "${{ github.event.pull_request.head.sha }}")"
|
||||||
|
manifest_pattern='(^|/)package\.json$|^pnpm-workspace\.yaml$|^\.npmrc$|^pnpmfile\.(cjs|js|mjs)$'
|
||||||
|
if printf '%s\n' "$changed" | grep -Eq "$manifest_pattern"; then
|
||||||
pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile
|
pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile
|
||||||
|
|
||||||
if ! git diff --quiet -- pnpm-lock.yaml; then
|
|
||||||
if [ "${{ github.event.pull_request.head.repo.full_name }}" = "${{ github.repository }}" ]; then
|
|
||||||
echo "pnpm-lock.yaml is stale for this PR. Wait for the Refresh Lockfile workflow to push the bot commit, then rerun checks." >&2
|
|
||||||
else
|
|
||||||
echo "pnpm-lock.yaml is stale for this fork PR. Run pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile and commit pnpm-lock.yaml." >&2
|
|
||||||
fi
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|||||||
2
.github/workflows/pr-verify.yml
vendored
2
.github/workflows/pr-verify.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
|||||||
cache: pnpm
|
cache: pnpm
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: pnpm install --frozen-lockfile
|
run: pnpm install --no-frozen-lockfile
|
||||||
|
|
||||||
- name: Typecheck
|
- name: Typecheck
|
||||||
run: pnpm -r typecheck
|
run: pnpm -r typecheck
|
||||||
|
|||||||
111
.github/workflows/refresh-lockfile-pr.yml
vendored
111
.github/workflows/refresh-lockfile-pr.yml
vendored
@@ -1,111 +0,0 @@
|
|||||||
name: Refresh Lockfile
|
|
||||||
|
|
||||||
on:
|
|
||||||
pull_request:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
types:
|
|
||||||
- opened
|
|
||||||
- synchronize
|
|
||||||
- reopened
|
|
||||||
- ready_for_review
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: refresh-lockfile-pr-${{ github.event.pull_request.number }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
refresh:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
timeout-minutes: 10
|
|
||||||
permissions:
|
|
||||||
contents: write
|
|
||||||
pull-requests: read
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: Detect dependency manifest changes
|
|
||||||
id: changes
|
|
||||||
env:
|
|
||||||
GH_TOKEN: ${{ github.token }}
|
|
||||||
run: |
|
|
||||||
changed="$(gh api "repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/files" --paginate --jq '.[].filename')"
|
|
||||||
manifest_pattern='(^|/)package\.json$|^pnpm-workspace\.yaml$|^\.npmrc$|^pnpmfile\.(cjs|js|mjs)$'
|
|
||||||
|
|
||||||
if printf '%s\n' "$changed" | grep -Eq "$manifest_pattern"; then
|
|
||||||
echo "manifest_changed=true" >> "$GITHUB_OUTPUT"
|
|
||||||
else
|
|
||||||
echo "manifest_changed=false" >> "$GITHUB_OUTPUT"
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ "${{ github.event.pull_request.head.repo.full_name }}" = "${{ github.repository }}" ]; then
|
|
||||||
echo "same_repo=true" >> "$GITHUB_OUTPUT"
|
|
||||||
else
|
|
||||||
echo "same_repo=false" >> "$GITHUB_OUTPUT"
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Checkout pull request head
|
|
||||||
if: steps.changes.outputs.manifest_changed == 'true'
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
with:
|
|
||||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
|
||||||
ref: ${{ github.event.pull_request.head.ref }}
|
|
||||||
fetch-depth: 0
|
|
||||||
|
|
||||||
- name: Setup pnpm
|
|
||||||
if: steps.changes.outputs.manifest_changed == 'true'
|
|
||||||
uses: pnpm/action-setup@v4
|
|
||||||
with:
|
|
||||||
version: 9.15.4
|
|
||||||
run_install: false
|
|
||||||
|
|
||||||
- name: Setup Node.js
|
|
||||||
if: steps.changes.outputs.manifest_changed == 'true'
|
|
||||||
uses: actions/setup-node@v4
|
|
||||||
with:
|
|
||||||
node-version: 20
|
|
||||||
cache: pnpm
|
|
||||||
|
|
||||||
- name: Refresh pnpm lockfile
|
|
||||||
if: steps.changes.outputs.manifest_changed == 'true'
|
|
||||||
run: pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile
|
|
||||||
|
|
||||||
- name: Fail on unexpected file changes
|
|
||||||
if: steps.changes.outputs.manifest_changed == 'true'
|
|
||||||
run: |
|
|
||||||
changed="$(git status --porcelain)"
|
|
||||||
if [ -z "$changed" ]; then
|
|
||||||
echo "Lockfile is already up to date."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
if printf '%s\n' "$changed" | grep -Fvq ' pnpm-lock.yaml'; then
|
|
||||||
echo "Unexpected files changed during lockfile refresh:"
|
|
||||||
echo "$changed"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Commit refreshed lockfile to same-repo PR branch
|
|
||||||
if: steps.changes.outputs.manifest_changed == 'true' && steps.changes.outputs.same_repo == 'true'
|
|
||||||
run: |
|
|
||||||
if git diff --quiet -- pnpm-lock.yaml; then
|
|
||||||
echo "Lockfile unchanged, nothing to do."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
git config user.name "lockfile-bot"
|
|
||||||
git config user.email "lockfile-bot@users.noreply.github.com"
|
|
||||||
git add pnpm-lock.yaml
|
|
||||||
git commit -m "chore(lockfile): refresh pnpm-lock.yaml"
|
|
||||||
git push origin "HEAD:${{ github.event.pull_request.head.ref }}"
|
|
||||||
|
|
||||||
- name: Fail fork PRs that need a lockfile refresh
|
|
||||||
if: steps.changes.outputs.manifest_changed == 'true' && steps.changes.outputs.same_repo != 'true'
|
|
||||||
run: |
|
|
||||||
if git diff --quiet -- pnpm-lock.yaml; then
|
|
||||||
echo "Lockfile unchanged, nothing to do."
|
|
||||||
exit 0
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "This fork PR changes dependency manifests and requires a refreshed pnpm-lock.yaml." >&2
|
|
||||||
echo "Run: pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile" >&2
|
|
||||||
echo "Then commit pnpm-lock.yaml to the PR branch." >&2
|
|
||||||
exit 1
|
|
||||||
81
.github/workflows/refresh-lockfile.yml
vendored
Normal file
81
.github/workflows/refresh-lockfile.yml
vendored
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
name: Refresh Lockfile
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: refresh-lockfile-master
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
refresh:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
timeout-minutes: 10
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
pull-requests: write
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Setup pnpm
|
||||||
|
uses: pnpm/action-setup@v4
|
||||||
|
with:
|
||||||
|
version: 9.15.4
|
||||||
|
run_install: false
|
||||||
|
|
||||||
|
- name: Setup Node.js
|
||||||
|
uses: actions/setup-node@v4
|
||||||
|
with:
|
||||||
|
node-version: 20
|
||||||
|
cache: pnpm
|
||||||
|
|
||||||
|
- name: Refresh pnpm lockfile
|
||||||
|
run: pnpm install --lockfile-only --ignore-scripts --no-frozen-lockfile
|
||||||
|
|
||||||
|
- name: Fail on unexpected file changes
|
||||||
|
run: |
|
||||||
|
changed="$(git status --porcelain)"
|
||||||
|
if [ -z "$changed" ]; then
|
||||||
|
echo "Lockfile is already up to date."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
if printf '%s\n' "$changed" | grep -Fvq ' pnpm-lock.yaml'; then
|
||||||
|
echo "Unexpected files changed during lockfile refresh:"
|
||||||
|
echo "$changed"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Create or update pull request
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
run: |
|
||||||
|
if git diff --quiet -- pnpm-lock.yaml; then
|
||||||
|
echo "Lockfile unchanged, nothing to do."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
BRANCH="chore/refresh-lockfile"
|
||||||
|
git config user.name "lockfile-bot"
|
||||||
|
git config user.email "lockfile-bot@users.noreply.github.com"
|
||||||
|
|
||||||
|
git checkout -B "$BRANCH"
|
||||||
|
git add pnpm-lock.yaml
|
||||||
|
git commit -m "chore(lockfile): refresh pnpm-lock.yaml"
|
||||||
|
git push --force origin "$BRANCH"
|
||||||
|
|
||||||
|
# Create PR if one doesn't already exist
|
||||||
|
existing=$(gh pr list --head "$BRANCH" --json number --jq '.[0].number')
|
||||||
|
if [ -z "$existing" ]; then
|
||||||
|
gh pr create \
|
||||||
|
--head "$BRANCH" \
|
||||||
|
--title "chore(lockfile): refresh pnpm-lock.yaml" \
|
||||||
|
--body "Auto-generated lockfile refresh after dependencies changed on master. This PR only updates pnpm-lock.yaml."
|
||||||
|
echo "Created new PR."
|
||||||
|
else
|
||||||
|
echo "PR #$existing already exists, branch updated via force push."
|
||||||
|
fi
|
||||||
@@ -514,6 +514,7 @@ async function ensureEmbeddedPostgres(dataDir: string, preferredPort: number): P
|
|||||||
password: "paperclip",
|
password: "paperclip",
|
||||||
port,
|
port,
|
||||||
persistent: true,
|
persistent: true,
|
||||||
|
initdbFlags: ["--encoding=UTF8", "--locale=C"],
|
||||||
onLog: () => {},
|
onLog: () => {},
|
||||||
onError: () => {},
|
onError: () => {},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -19,9 +19,9 @@ Current implementation status:
|
|||||||
|
|
||||||
GitHub Actions owns `pnpm-lock.yaml`.
|
GitHub Actions owns `pnpm-lock.yaml`.
|
||||||
|
|
||||||
- Same-repo pull requests that change dependency manifests are auto-refreshed by GitHub Actions before merge.
|
- Do not commit `pnpm-lock.yaml` in pull requests.
|
||||||
- Fork pull requests that change dependency manifests must include the refreshed `pnpm-lock.yaml`.
|
- Pull request CI validates dependency resolution when manifests change.
|
||||||
- Pull request CI validates lockfile freshness when manifests change and verifies with `--frozen-lockfile`.
|
- Pushes to `master` regenerate `pnpm-lock.yaml` with `pnpm install --lockfile-only --no-frozen-lockfile`, commit it back if needed, and then run verification with `--frozen-lockfile`.
|
||||||
|
|
||||||
## Start Dev
|
## Start Dev
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ import {
|
|||||||
} from "@paperclipai/adapter-utils/server-utils";
|
} from "@paperclipai/adapter-utils/server-utils";
|
||||||
|
|
||||||
const MODELS_CACHE_TTL_MS = 60_000;
|
const MODELS_CACHE_TTL_MS = 60_000;
|
||||||
|
const MODELS_DISCOVERY_TIMEOUT_MS = 20_000;
|
||||||
|
|
||||||
function resolveOpenCodeCommand(input: unknown): string {
|
function resolveOpenCodeCommand(input: unknown): string {
|
||||||
const envOverride =
|
const envOverride =
|
||||||
@@ -115,14 +116,14 @@ export async function discoverOpenCodeModels(input: {
|
|||||||
{
|
{
|
||||||
cwd,
|
cwd,
|
||||||
env: runtimeEnv,
|
env: runtimeEnv,
|
||||||
timeoutSec: 20,
|
timeoutSec: MODELS_DISCOVERY_TIMEOUT_MS / 1000,
|
||||||
graceSec: 3,
|
graceSec: 3,
|
||||||
onLog: async () => {},
|
onLog: async () => {},
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
if (result.timedOut) {
|
if (result.timedOut) {
|
||||||
throw new Error("`opencode models` timed out.");
|
throw new Error(`\`opencode models\` timed out after ${MODELS_DISCOVERY_TIMEOUT_MS / 1000}s.`);
|
||||||
}
|
}
|
||||||
if ((result.exitCode ?? 1) !== 0) {
|
if ((result.exitCode ?? 1) !== 0) {
|
||||||
const detail = firstNonEmptyLine(result.stderr) || firstNonEmptyLine(result.stdout);
|
const detail = firstNonEmptyLine(result.stderr) || firstNonEmptyLine(result.stdout);
|
||||||
|
|||||||
@@ -730,7 +730,7 @@ export async function ensurePostgresDatabase(
|
|||||||
`;
|
`;
|
||||||
if (existing.length > 0) return "exists";
|
if (existing.length > 0) return "exists";
|
||||||
|
|
||||||
await sql.unsafe(`create database "${databaseName}"`);
|
await sql.unsafe(`create database "${databaseName}" encoding 'UTF8' lc_collate 'C' lc_ctype 'C' template template0`);
|
||||||
return "created";
|
return "created";
|
||||||
} finally {
|
} finally {
|
||||||
await sql.end();
|
await sql.end();
|
||||||
|
|||||||
@@ -96,6 +96,7 @@ async function ensureEmbeddedPostgresConnection(
|
|||||||
password: "paperclip",
|
password: "paperclip",
|
||||||
port: preferredPort,
|
port: preferredPort,
|
||||||
persistent: true,
|
persistent: true,
|
||||||
|
initdbFlags: ["--encoding=UTF8", "--locale=C"],
|
||||||
onLog: () => {},
|
onLog: () => {},
|
||||||
onError: () => {},
|
onError: () => {},
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -38,9 +38,9 @@
|
|||||||
"@paperclipai/adapter-codex-local": "workspace:*",
|
"@paperclipai/adapter-codex-local": "workspace:*",
|
||||||
"@paperclipai/adapter-cursor-local": "workspace:*",
|
"@paperclipai/adapter-cursor-local": "workspace:*",
|
||||||
"@paperclipai/adapter-gemini-local": "workspace:*",
|
"@paperclipai/adapter-gemini-local": "workspace:*",
|
||||||
|
"@paperclipai/adapter-openclaw-gateway": "workspace:*",
|
||||||
"@paperclipai/adapter-opencode-local": "workspace:*",
|
"@paperclipai/adapter-opencode-local": "workspace:*",
|
||||||
"@paperclipai/adapter-pi-local": "workspace:*",
|
"@paperclipai/adapter-pi-local": "workspace:*",
|
||||||
"@paperclipai/adapter-openclaw-gateway": "workspace:*",
|
|
||||||
"@paperclipai/adapter-utils": "workspace:*",
|
"@paperclipai/adapter-utils": "workspace:*",
|
||||||
"@paperclipai/db": "workspace:*",
|
"@paperclipai/db": "workspace:*",
|
||||||
"@paperclipai/shared": "workspace:*",
|
"@paperclipai/shared": "workspace:*",
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ type EmbeddedPostgresCtor = new (opts: {
|
|||||||
password: string;
|
password: string;
|
||||||
port: number;
|
port: number;
|
||||||
persistent: boolean;
|
persistent: boolean;
|
||||||
|
initdbFlags?: string[];
|
||||||
onLog?: (message: unknown) => void;
|
onLog?: (message: unknown) => void;
|
||||||
onError?: (message: unknown) => void;
|
onError?: (message: unknown) => void;
|
||||||
}) => EmbeddedPostgresInstance;
|
}) => EmbeddedPostgresInstance;
|
||||||
@@ -334,6 +335,7 @@ export async function startServer(): Promise<StartedServer> {
|
|||||||
password: "paperclip",
|
password: "paperclip",
|
||||||
port,
|
port,
|
||||||
persistent: true,
|
persistent: true,
|
||||||
|
initdbFlags: ["--encoding=UTF8", "--locale=C"],
|
||||||
onLog: appendEmbeddedPostgresLog,
|
onLog: appendEmbeddedPostgresLog,
|
||||||
onError: appendEmbeddedPostgresLog,
|
onError: appendEmbeddedPostgresLog,
|
||||||
});
|
});
|
||||||
@@ -512,11 +514,14 @@ export async function startServer(): Promise<StartedServer> {
|
|||||||
if (config.heartbeatSchedulerEnabled) {
|
if (config.heartbeatSchedulerEnabled) {
|
||||||
const heartbeat = heartbeatService(db as any);
|
const heartbeat = heartbeatService(db as any);
|
||||||
|
|
||||||
// Reap orphaned runs at startup (no threshold -- runningProcesses is empty)
|
// Reap orphaned running runs at startup while in-memory execution state is empty,
|
||||||
void heartbeat.reapOrphanedRuns().catch((err) => {
|
// then resume any persisted queued runs that were waiting on the previous process.
|
||||||
logger.error({ err }, "startup reap of orphaned heartbeat runs failed");
|
void heartbeat
|
||||||
});
|
.reapOrphanedRuns()
|
||||||
|
.then(() => heartbeat.resumeQueuedRuns())
|
||||||
|
.catch((err) => {
|
||||||
|
logger.error({ err }, "startup heartbeat recovery failed");
|
||||||
|
});
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
void heartbeat
|
void heartbeat
|
||||||
.tickTimers(new Date())
|
.tickTimers(new Date())
|
||||||
@@ -529,11 +534,13 @@ export async function startServer(): Promise<StartedServer> {
|
|||||||
logger.error({ err }, "heartbeat timer tick failed");
|
logger.error({ err }, "heartbeat timer tick failed");
|
||||||
});
|
});
|
||||||
|
|
||||||
// Periodically reap orphaned runs (5-min staleness threshold)
|
// Periodically reap orphaned runs (5-min staleness threshold) and make sure
|
||||||
|
// persisted queued work is still being driven forward.
|
||||||
void heartbeat
|
void heartbeat
|
||||||
.reapOrphanedRuns({ staleThresholdMs: 5 * 60 * 1000 })
|
.reapOrphanedRuns({ staleThresholdMs: 5 * 60 * 1000 })
|
||||||
|
.then(() => heartbeat.resumeQueuedRuns())
|
||||||
.catch((err) => {
|
.catch((err) => {
|
||||||
logger.error({ err }, "periodic reap of orphaned heartbeat runs failed");
|
logger.error({ err }, "periodic heartbeat recovery failed");
|
||||||
});
|
});
|
||||||
}, config.heartbeatSchedulerIntervalMs);
|
}, config.heartbeatSchedulerIntervalMs);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -455,6 +455,7 @@ export function heartbeatService(db: Db) {
|
|||||||
const runLogStore = getRunLogStore();
|
const runLogStore = getRunLogStore();
|
||||||
const secretsSvc = secretService(db);
|
const secretsSvc = secretService(db);
|
||||||
const issuesSvc = issueService(db);
|
const issuesSvc = issueService(db);
|
||||||
|
const activeRunExecutions = new Set<string>();
|
||||||
|
|
||||||
async function getAgent(agentId: string) {
|
async function getAgent(agentId: string) {
|
||||||
return db
|
return db
|
||||||
@@ -959,7 +960,7 @@ export function heartbeatService(db: Db) {
|
|||||||
const reaped: string[] = [];
|
const reaped: string[] = [];
|
||||||
|
|
||||||
for (const run of activeRuns) {
|
for (const run of activeRuns) {
|
||||||
if (runningProcesses.has(run.id)) continue;
|
if (runningProcesses.has(run.id) || activeRunExecutions.has(run.id)) continue;
|
||||||
|
|
||||||
// Apply staleness threshold to avoid false positives
|
// Apply staleness threshold to avoid false positives
|
||||||
if (staleThresholdMs > 0) {
|
if (staleThresholdMs > 0) {
|
||||||
@@ -998,6 +999,18 @@ export function heartbeatService(db: Db) {
|
|||||||
return { reaped: reaped.length, runIds: reaped };
|
return { reaped: reaped.length, runIds: reaped };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function resumeQueuedRuns() {
|
||||||
|
const queuedRuns = await db
|
||||||
|
.select({ agentId: heartbeatRuns.agentId })
|
||||||
|
.from(heartbeatRuns)
|
||||||
|
.where(eq(heartbeatRuns.status, "queued"));
|
||||||
|
|
||||||
|
const agentIds = [...new Set(queuedRuns.map((r) => r.agentId))];
|
||||||
|
for (const agentId of agentIds) {
|
||||||
|
await startNextQueuedRunForAgent(agentId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function updateRuntimeState(
|
async function updateRuntimeState(
|
||||||
agent: typeof agents.$inferSelect,
|
agent: typeof agents.$inferSelect,
|
||||||
run: typeof heartbeatRuns.$inferSelect,
|
run: typeof heartbeatRuns.$inferSelect,
|
||||||
@@ -1089,6 +1102,9 @@ export function heartbeatService(db: Db) {
|
|||||||
run = claimed;
|
run = claimed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
activeRunExecutions.add(run.id);
|
||||||
|
|
||||||
|
try {
|
||||||
const agent = await getAgent(run.agentId);
|
const agent = await getAgent(run.agentId);
|
||||||
if (!agent) {
|
if (!agent) {
|
||||||
await setRunStatus(runId, "failed", {
|
await setRunStatus(runId, "failed", {
|
||||||
@@ -1676,10 +1692,41 @@ export function heartbeatService(db: Db) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
await finalizeAgentStatus(agent.id, "failed");
|
await finalizeAgentStatus(agent.id, "failed");
|
||||||
} finally {
|
|
||||||
await releaseRuntimeServicesForRun(run.id);
|
|
||||||
await startNextQueuedRunForAgent(agent.id);
|
|
||||||
}
|
}
|
||||||
|
} catch (outerErr) {
|
||||||
|
// Setup code before adapter.execute threw (e.g. ensureRuntimeState, resolveWorkspaceForRun).
|
||||||
|
// The inner catch did not fire, so we must record the failure here.
|
||||||
|
const message = outerErr instanceof Error ? outerErr.message : "Unknown setup failure";
|
||||||
|
logger.error({ err: outerErr, runId }, "heartbeat execution setup failed");
|
||||||
|
await setRunStatus(runId, "failed", {
|
||||||
|
error: message,
|
||||||
|
errorCode: "adapter_failed",
|
||||||
|
finishedAt: new Date(),
|
||||||
|
}).catch(() => undefined);
|
||||||
|
await setWakeupStatus(run.wakeupRequestId, "failed", {
|
||||||
|
finishedAt: new Date(),
|
||||||
|
error: message,
|
||||||
|
}).catch(() => undefined);
|
||||||
|
const failedRun = await getRun(runId).catch(() => null);
|
||||||
|
if (failedRun) {
|
||||||
|
// Emit a run-log event so the failure is visible in the run timeline,
|
||||||
|
// consistent with what the inner catch block does for adapter failures.
|
||||||
|
await appendRunEvent(failedRun, 1, {
|
||||||
|
eventType: "error",
|
||||||
|
stream: "system",
|
||||||
|
level: "error",
|
||||||
|
message,
|
||||||
|
}).catch(() => undefined);
|
||||||
|
await releaseIssueExecutionAndPromote(failedRun).catch(() => undefined);
|
||||||
|
}
|
||||||
|
// Ensure the agent is not left stuck in "running" if the inner catch handler's
|
||||||
|
// DB calls threw (e.g. a transient DB error in finalizeAgentStatus).
|
||||||
|
await finalizeAgentStatus(run.agentId, "failed").catch(() => undefined);
|
||||||
|
} finally {
|
||||||
|
await releaseRuntimeServicesForRun(run.id).catch(() => undefined);
|
||||||
|
activeRunExecutions.delete(run.id);
|
||||||
|
await startNextQueuedRunForAgent(run.agentId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function releaseIssueExecutionAndPromote(run: typeof heartbeatRuns.$inferSelect) {
|
async function releaseIssueExecutionAndPromote(run: typeof heartbeatRuns.$inferSelect) {
|
||||||
@@ -2425,6 +2472,8 @@ export function heartbeatService(db: Db) {
|
|||||||
|
|
||||||
reapOrphanedRuns,
|
reapOrphanedRuns,
|
||||||
|
|
||||||
|
resumeQueuedRuns,
|
||||||
|
|
||||||
tickTimers: async (now = new Date()) => {
|
tickTimers: async (now = new Date()) => {
|
||||||
const allAgents = await db.select().from(agents);
|
const allAgents = await db.select().from(agents);
|
||||||
let checked = 0;
|
let checked = 0;
|
||||||
|
|||||||
Reference in New Issue
Block a user