Self-Healing Workflow #10133
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Self-Healing Workflow | |
| on: | |
| # PRIMARY: React to workflow failures (event-based - FREE) | |
| workflow_run: | |
| workflows: ["*"] | |
| types: | |
| - completed | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| issues: write | |
| jobs: | |
| self-heal: | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| if: ${{ github.event.workflow_run.conclusion == 'failure' }} | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Get Workflow Info | |
| id: info | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| RUN_ID="${{ github.event.workflow_run.id }}" | |
| WORKFLOW_NAME="${{ github.event.workflow_run.name }}" | |
| echo "workflow_name=$WORKFLOW_NAME" >> $GITHUB_OUTPUT | |
| echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT | |
| echo "run_url=https://github.com/${{ github.repository }}/actions/runs/$RUN_ID" >> $GITHUB_OUTPUT | |
| - name: Classify Error | |
| id: classify | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| RUN_ID="${{ steps.info.outputs.run_id }}" | |
| LOGS=$(gh run view $RUN_ID --log 2>&1 || echo "") | |
| if echo "$LOGS" | grep -qiE "ETIMEDOUT|ECONNRESET|429|rate limit"; then | |
| echo "error_type=transient" >> $GITHUB_OUTPUT | |
| elif echo "$LOGS" | grep -qiE "npm ERR|yarn error|pip.*failed"; then | |
| echo "error_type=dependency" >> $GITHUB_OUTPUT | |
| elif echo "$LOGS" | grep -qiE "lint|prettier|eslint"; then | |
| echo "error_type=lint" >> $GITHUB_OUTPUT | |
| elif echo "$LOGS" | grep -qiE "test.*failed|FAIL|AssertionError"; then | |
| echo "error_type=test" >> $GITHUB_OUTPUT | |
| else | |
| echo "error_type=unknown" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Auto-Retry Transient Errors | |
| if: steps.classify.outputs.error_type == 'transient' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| echo "Retrying transient error..." | |
| gh run rerun ${{ steps.info.outputs.run_id }} --failed || echo "Could not rerun" | |
| - name: Create Issue for Non-Transient Errors | |
| if: steps.classify.outputs.error_type != 'transient' | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| WORKFLOW_NAME="${{ steps.info.outputs.workflow_name }}" | |
| ERROR_TYPE="${{ steps.classify.outputs.error_type }}" | |
| RUN_URL="${{ steps.info.outputs.run_url }}" | |
| EXISTING=$(gh issue list \ | |
| --label "bug" \ | |
| --search "in:title $WORKFLOW_NAME" \ | |
| --state open \ | |
| --json number \ | |
| --jq '.[0].number' 2>/dev/null || echo "") | |
| if [ -n "$EXISTING" ] && [ "$EXISTING" != "null" ]; then | |
| echo "Updating issue #$EXISTING" | |
| gh issue comment $EXISTING --body "New failure: $RUN_URL (Type: $ERROR_TYPE)" | |
| else | |
| echo "Creating new issue" | |
| gh issue create \ | |
| --title "CI Failure: $WORKFLOW_NAME" \ | |
| --body "Workflow failed. Run: $RUN_URL. Error type: $ERROR_TYPE" \ | |
| --label "bug,ai-plan" || echo "Could not create issue" | |
| fi |