Skip to content

Self-Healing Workflow #10133

Self-Healing Workflow

Self-Healing Workflow #10133

Workflow file for this run

name: Self-Healing Workflow
on:
# PRIMARY: React to workflow failures (event-based - FREE)
workflow_run:
workflows: ["*"]
types:
- completed
permissions:
contents: write
pull-requests: write
issues: write
jobs:
self-heal:
runs-on: ubuntu-latest
timeout-minutes: 10
if: ${{ github.event.workflow_run.conclusion == 'failure' }}
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Get Workflow Info
id: info
env:
GH_TOKEN: ${{ github.token }}
run: |
RUN_ID="${{ github.event.workflow_run.id }}"
WORKFLOW_NAME="${{ github.event.workflow_run.name }}"
echo "workflow_name=$WORKFLOW_NAME" >> $GITHUB_OUTPUT
echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT
echo "run_url=https://github.com/${{ github.repository }}/actions/runs/$RUN_ID" >> $GITHUB_OUTPUT
- name: Classify Error
id: classify
env:
GH_TOKEN: ${{ github.token }}
run: |
RUN_ID="${{ steps.info.outputs.run_id }}"
LOGS=$(gh run view $RUN_ID --log 2>&1 || echo "")
if echo "$LOGS" | grep -qiE "ETIMEDOUT|ECONNRESET|429|rate limit"; then
echo "error_type=transient" >> $GITHUB_OUTPUT
elif echo "$LOGS" | grep -qiE "npm ERR|yarn error|pip.*failed"; then
echo "error_type=dependency" >> $GITHUB_OUTPUT
elif echo "$LOGS" | grep -qiE "lint|prettier|eslint"; then
echo "error_type=lint" >> $GITHUB_OUTPUT
elif echo "$LOGS" | grep -qiE "test.*failed|FAIL|AssertionError"; then
echo "error_type=test" >> $GITHUB_OUTPUT
else
echo "error_type=unknown" >> $GITHUB_OUTPUT
fi
- name: Auto-Retry Transient Errors
if: steps.classify.outputs.error_type == 'transient'
env:
GH_TOKEN: ${{ github.token }}
run: |
echo "Retrying transient error..."
gh run rerun ${{ steps.info.outputs.run_id }} --failed || echo "Could not rerun"
- name: Create Issue for Non-Transient Errors
if: steps.classify.outputs.error_type != 'transient'
env:
GH_TOKEN: ${{ github.token }}
run: |
WORKFLOW_NAME="${{ steps.info.outputs.workflow_name }}"
ERROR_TYPE="${{ steps.classify.outputs.error_type }}"
RUN_URL="${{ steps.info.outputs.run_url }}"
EXISTING=$(gh issue list \
--label "bug" \
--search "in:title $WORKFLOW_NAME" \
--state open \
--json number \
--jq '.[0].number' 2>/dev/null || echo "")
if [ -n "$EXISTING" ] && [ "$EXISTING" != "null" ]; then
echo "Updating issue #$EXISTING"
gh issue comment $EXISTING --body "New failure: $RUN_URL (Type: $ERROR_TYPE)"
else
echo "Creating new issue"
gh issue create \
--title "CI Failure: $WORKFLOW_NAME" \
--body "Workflow failed. Run: $RUN_URL. Error type: $ERROR_TYPE" \
--label "bug,ai-plan" || echo "Could not create issue"
fi