temporalio
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 8 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.github/workflows/nightly-throughput-stress.yml‎
Lines changed: 19 additions & 20 deletions b/‎.github/workflows/nightly-throughput-stress.yml‎
Lines changed: 19 additions & 20 deletions
diff --git a/‎.github/workflows/omes.yml‎
Lines changed: 7 additions & 1 deletion b/‎.github/workflows/omes.yml‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎contrib/opentelemetry/tracing_interceptor.go‎
Lines changed: 19 additions & 2 deletions b/‎contrib/opentelemetry/tracing_interceptor.go‎
Lines changed: 19 additions & 2 deletions
diff --git a/‎contrib/opentelemetry/tracing_interceptor_test.go‎
Lines changed: 110 additions & 0 deletions b/‎contrib/opentelemetry/tracing_interceptor_test.go‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎contrib/tools/workflowcheck/README.md‎
Lines changed: 1 addition & 1 deletion b/‎contrib/tools/workflowcheck/README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎internal/internal_worker.go‎
Lines changed: 12 additions & 2 deletions b/‎internal/internal_worker.go‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎internal/internal_worker_base.go‎
Lines changed: 1 addition & 1 deletion b/‎internal/internal_worker_base.go‎
Lines changed: 1 addition & 1 deletion
@@ -5,6 +5,10 @@ on:
     branches:
       - master
 
+permissions:
+  contents: read
+  actions: read
+
 jobs:
   build-and-test:
     strategy:
@@ -23,6 +27,10 @@ jobs:
             runsOn: macos-15-intel
           - os: macos-arm
             runsOn: macos-14
+    permissions:
+      contents: read
+      checks: write
+      statuses: write
     runs-on: ${{ matrix.runsOn || matrix.os }}
     steps:
       - name: Checkout repository
 
@@ -7,28 +7,32 @@ on:
   push:
     branches:
       - add-nightly-throughput-stress-workflow
+      - cleanup-nightly-tps-workflow
   workflow_dispatch:
     inputs:
       duration:
         description: 'Test duration (e.g., 6h, 1h)'
         required: false
-        default: '6h'
+        default: '5h'
         type: string
       timeout:
         description: 'Scenario timeout (should be greater than duration)'
         required: false
-        default: '6h30m'
+        default: '5h30m'
         type: string
       job_timeout_minutes:
         description: 'GitHub Actions job timeout in minutes'
         required: false
-        default: 420
+        default: 360
         type: number
 
+permissions:
+  contents: read
+
 env:
   # Workflow configuration
-  TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '6h' }}
-  TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '6h30m' }}
+  TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }}
+  TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }}
 
   # Logging and artifacts
   WORKER_LOG_DIR: /tmp/throughput-stress-logs
@@ -41,7 +45,10 @@ env:
 jobs:
   throughput-stress:
     runs-on: ubuntu-latest-4-cores
-    timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || (vars.NIGHTLY_JOB_TIMEOUT_MINUTES || '420')) }}
+    timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || (vars.NIGHTLY_JOB_TIMEOUT_MINUTES || '360')) }}
+    permissions:
+      contents: read
+      actions: write
 
     steps:
       - name: Print test configuration
@@ -93,6 +100,11 @@ jobs:
         working-directory: omes
         continue-on-error: true
         run: |
+          # This makes the pipeline return the exit code of the first failing command
+          # Otherwise the output of the `tee` command will be used
+          # (which is troublesome when the scenario fails but the `tee` command succeeds)
+          set -o pipefail
+
           # Use run-scenario-with-worker to build and run in one step
           # Pass the SDK directory as --version for local testing
           # Note: The hardcoded values below match OMES defaults, except:
@@ -113,8 +125,6 @@ jobs:
             --option min-throughput-per-hour=1000 \
             2>&1 | tee $WORKER_LOG_DIR/scenario.log
 
-          echo "SCENARIO_EXIT_CODE=${PIPESTATUS[0]}" >> $GITHUB_ENV
-
       - name: Upload logs on failure/cancellation
         if: failure() || cancelled()
         uses: actions/upload-artifact@v4
@@ -136,21 +146,10 @@ jobs:
                   "type": "section",
                   "text": {
                     "type": "mrkdwn",
-                    "text": "*Nightly Throughput Stress Failed* :x:\n\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
+                    "text": "*Nightly Throughput Stress Failed* :x:\n\n*Repository:* ${{ github.repository }}\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
                   }
                 }
               ]
             }
         env:
           SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}
-
-      - name: Fail if scenario failed
-        if: always()
-        run: |
-          if [ "${SCENARIO_EXIT_CODE:-1}" != "0" ]; then
-            echo "❌ Throughput stress test failed with exit code ${SCENARIO_EXIT_CODE}"
-            echo "Check the artifacts for detailed logs and state"
-            exit 1
-          else
-            echo "✅ Throughput stress test completed successfully"
-          fi
@@ -4,8 +4,14 @@ on:
     branches:
       - master
 
+permissions:
+  contents: read
+
 jobs:
   omes-image-build:
+    permissions:
+      contents: read
+      packages: write
     uses: temporalio/omes/.github/workflows/docker-images.yml@main
     secrets: inherit
     with:
@@ -14,4 +20,4 @@ jobs:
       sdk-repo-ref: ${{ github.event.pull_request.head.ref || github.ref }}
       # TODO: Remove once we have a good way of cleaning up sha-based pushed images
       docker-tag-ext: ci-latest
-      do-push: true
+      do-push: true
@@ -14,6 +14,7 @@ import (
 
 	"go.temporal.io/sdk/interceptor"
 	"go.temporal.io/sdk/log"
+	"go.temporal.io/sdk/temporal"
 )
 
 // DefaultTextMapPropagator is the default OpenTelemetry TextMapPropagator used
@@ -196,8 +197,17 @@ func (t *tracer) StartSpan(opts *interceptor.TracerStartSpanOptions) (intercepto
 		}
 	}
 
+	if opts.ToHeader && opts.FromHeader {
+		return nil, fmt.Errorf("cannot set both ToHeader and FromHeader for span")
+	}
+
+	spanKind := trace.SpanKindServer
+	if opts.ToHeader {
+		spanKind = trace.SpanKindClient
+	}
+
 	// Create span
-	span := t.options.SpanStarter(ctx, t.options.Tracer, opts.Operation+":"+opts.Name, trace.WithTimestamp(opts.Time))
+	span := t.options.SpanStarter(ctx, t.options.Tracer, opts.Operation+":"+opts.Name, trace.WithTimestamp(opts.Time), trace.WithSpanKind(spanKind))
 
 	// Set tags
 	if len(opts.Tags) > 0 {
@@ -241,12 +251,19 @@ type tracerSpan struct {
 }
 
 func (t *tracerSpan) Finish(opts *interceptor.TracerFinishSpanOptions) {
-	if opts.Error != nil {
+	t.RecordError(opts.Error)
+
+	if opts.Error != nil && !isBenignApplicationError(opts.Error) {
 		t.SetStatus(codes.Error, opts.Error.Error())
 	}
 	t.End()
 }
 
+func isBenignApplicationError(err error) bool {
+	appError, _ := err.(*temporal.ApplicationError)
+	return appError != nil && appError.Category() == temporal.ApplicationErrorCategoryBenign
+}
+
 type textMapCarrier map[string]string
 
 func (t textMapCarrier) Get(key string) string        { return t[key] }
 
@@ -2,15 +2,19 @@ package opentelemetry_test
 
 import (
 	"testing"
+	"time"
 
+	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
+	"go.opentelemetry.io/otel/codes"
 	sdktrace "go.opentelemetry.io/otel/sdk/trace"
 	"go.opentelemetry.io/otel/sdk/trace/tracetest"
 	"go.opentelemetry.io/otel/trace"
 
 	"go.temporal.io/sdk/contrib/opentelemetry"
 	"go.temporal.io/sdk/interceptor"
 	"go.temporal.io/sdk/internal/interceptortest"
+	"go.temporal.io/sdk/temporal"
 )
 
 func TestSpanPropagation(t *testing.T) {
@@ -42,3 +46,109 @@ func spanChildren(spans []sdktrace.ReadOnlySpan, parentID trace.SpanID) (ret []*
 	}
 	return
 }
+
+func TestSpanKind(t *testing.T) {
+	tests := []struct {
+		operation    string
+		toHeader     bool
+		fromHeader   bool
+		expectedKind trace.SpanKind
+	}{
+		{
+			operation:    "StartWorkflow",
+			toHeader:     true,
+			fromHeader:   false,
+			expectedKind: trace.SpanKindClient,
+		},
+		{
+			operation:    "RunWorkflow",
+			toHeader:     false,
+			fromHeader:   true,
+			expectedKind: trace.SpanKindServer,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.operation, func(t *testing.T) {
+			rec := tracetest.NewSpanRecorder()
+			tracer, err := opentelemetry.NewTracer(opentelemetry.TracerOptions{
+				Tracer: sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(rec)).Tracer(""),
+			})
+			require.NoError(t, err)
+
+			span, err := tracer.StartSpan(&interceptor.TracerStartSpanOptions{
+				Operation:  tt.operation,
+				Name:       "test-span",
+				ToHeader:   tt.toHeader,
+				FromHeader: tt.fromHeader,
+			})
+			require.NoError(t, err)
+
+			span.Finish(&interceptor.TracerFinishSpanOptions{})
+
+			spans := rec.Ended()
+			require.Equal(t, len(spans), 1)
+
+			foundSpan := spans[0]
+			assert.Equal(t, tt.expectedKind, foundSpan.SpanKind(),
+				"Expected span kind %v but got %v for operation %s",
+				tt.expectedKind, foundSpan.SpanKind(), tt.operation)
+		})
+	}
+}
+
+func TestBenignErrorSpanStatus(t *testing.T) {
+	tests := []struct {
+		name         string
+		err          error
+		expectError  bool
+		expectStatus codes.Code
+	}{
+		{
+			name:         "benign application error should not set error status",
+			err:          temporal.NewApplicationErrorWithOptions("benign error", "TestType", temporal.ApplicationErrorOptions{Category: temporal.ApplicationErrorCategoryBenign}),
+			expectError:  false,
+			expectStatus: codes.Unset,
+		},
+		{
+			name:         "regular application error should set error status",
+			err:          temporal.NewApplicationError("regular error", "TestType"),
+			expectError:  true,
+			expectStatus: codes.Error,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rec := tracetest.NewSpanRecorder()
+			tracer, err := opentelemetry.NewTracer(opentelemetry.TracerOptions{
+				Tracer: sdktrace.NewTracerProvider(sdktrace.WithSpanProcessor(rec)).Tracer(""),
+			})
+			require.NoError(t, err)
+
+			span, err := tracer.StartSpan(&interceptor.TracerStartSpanOptions{
+				Operation: "TestOperation",
+				Name:      "TestSpan",
+				Time:      time.Now(),
+			})
+			require.NoError(t, err)
+
+			span.Finish(&interceptor.TracerFinishSpanOptions{
+				Error: tt.err,
+			})
+
+			// Check recorded spans
+			spans := rec.Ended()
+			require.Len(t, spans, 1)
+
+			recordedSpan := spans[0]
+			assert.Equal(t, tt.expectStatus, recordedSpan.Status().Code)
+
+			if tt.expectError {
+				assert.NotEmpty(t, recordedSpan.Status().Description)
+			} else {
+				assert.Empty(t, recordedSpan.Status().Description)
+			}
+		})
+	}
+}
@@ -145,7 +145,7 @@ In addition to those functions/vars, the following Go source constructs are cons
 * Iterating over a channel via `range`
 * Iterating over a map via `range`
 
-Many constructs that are known to be non-deterministic, such as mutating a global variable, are not able to be reliably
+Many constructs that are known to be non-deterministic, such as mutating a global variable, cannot be reliably
 distinguished from deterministic use in common cases. This tool intentionally does not flag them.
 
 In some cases, functions that are considered non-deterministic are commonly used in ways that only follow a
 
@@ -448,15 +448,26 @@ func newSessionWorker(client *WorkflowClient, params workerExecutionParameters,
 	creationTaskqueue := getCreationTaskqueue(params.TaskQueue)
 	params.BackgroundContext = context.WithValue(params.BackgroundContext, sessionEnvironmentContextKey, sessionEnvironment)
 	params.TaskQueue = sessionEnvironment.GetResourceSpecificTaskqueue()
+	// For the resource specific task queue, we don't need to include deployment options
+	// Save them to restore later
+	deployments := params.DeploymentOptions
+	useBuildIDForVersioning := params.UseBuildIDForVersioning
+	// Disable versioning for activity worker within session, but still send deployment name for debug purpose
+	params.DeploymentOptions.UseVersioning = false
+	params.UseBuildIDForVersioning = false
 	activityWorker := newActivityWorker(client, params,
-		&workerOverrides{slotSupplier: params.Tuner.GetSessionActivitySlotSupplier()}, env, nil)
+		&workerOverrides{
+			slotSupplier: params.Tuner.GetSessionActivitySlotSupplier(),
+		}, env, nil)
 
 	params.ActivityTaskPollerBehavior = NewPollerBehaviorSimpleMaximum(
 		PollerBehaviorSimpleMaximumOptions{
 			MaximumNumberOfPollers: 1,
 		},
 	)
 	params.TaskQueue = creationTaskqueue
+	params.DeploymentOptions = deployments
+	params.UseBuildIDForVersioning = useBuildIDForVersioning
 	// Although we have session token bucket to limit session size across creation
 	// and recreation, we also limit it here for creation only
 	overrides := &workerOverrides{}
@@ -518,7 +529,6 @@ func newActivityWorker(
 	} else {
 		slotSupplier = params.Tuner.GetActivityTaskSlotSupplier()
 	}
-
 	bwo := baseWorkerOptions{
 		pollerRate:       defaultPollerRate,
 		slotSupplier:     slotSupplier,
 
@@ -885,7 +885,7 @@ func newScalableTaskPoller(
 	}
 	switch p := pollerBehavior.(type) {
 	case *pollerBehaviorAutoscaling:
-		tw.pollerCount = p.initialNumberOfPollers
+		tw.pollerCount = p.maximumNumberOfPollers
 		tw.pollerSemaphore = newPollerSemaphore(p.initialNumberOfPollers)
 		tw.pollerAutoscalerReportHandle = newPollScalerReportHandle(pollScalerReportHandleOptions{
 			initialPollerCount: p.initialNumberOfPollers,
Original file line number	Diff line number	Diff line change
`@@ -885,7 +885,7 @@ func newScalableTaskPoller(`
`885`	`885`	`}`
`886`	`886`	`switch p := pollerBehavior.(type) {`
`887`	`887`	`case *pollerBehaviorAutoscaling:`
`888`		`- tw.pollerCount = p.initialNumberOfPollers`
	`888`	`+ tw.pollerCount = p.maximumNumberOfPollers`
`889`	`889`	`tw.pollerSemaphore = newPollerSemaphore(p.initialNumberOfPollers)`
`890`	`890`	`tw.pollerAutoscalerReportHandle = newPollScalerReportHandle(pollScalerReportHandleOptions{`
`891`	`891`	`initialPollerCount: p.initialNumberOfPollers,`