|
| 1 | +import asyncio |
| 2 | +from typing import Any, Dict, Optional |
| 3 | + |
| 4 | +import pytest |
| 5 | + |
| 6 | +import ray |
| 7 | +from ray import serve |
| 8 | +from ray._private.test_utils import wait_for_condition |
| 9 | +from ray.serve._private.common import ReplicaID |
| 10 | +from ray.serve.context import _get_internal_replica_context |
| 11 | +from ray.serve.handle import DeploymentHandle |
| 12 | + |
| 13 | + |
| 14 | +@serve.deployment( |
| 15 | + request_routing_stats_period_s=0.1, request_routing_stats_timeout_s=0.1 |
| 16 | +) |
| 17 | +class Patient: |
| 18 | + def __init__(self): |
| 19 | + self.routing_stats = {} |
| 20 | + self.should_hang = False |
| 21 | + self.should_fail = False |
| 22 | + context = _get_internal_replica_context() |
| 23 | + self.replica_id: ReplicaID = context.replica_id |
| 24 | + |
| 25 | + async def record_routing_stats(self): |
| 26 | + if self.should_hang: |
| 27 | + import time |
| 28 | + |
| 29 | + time.sleep(10000) |
| 30 | + |
| 31 | + if self.should_fail: |
| 32 | + raise Exception("intended to fail") |
| 33 | + |
| 34 | + return self.routing_stats |
| 35 | + |
| 36 | + def __call__(self, *args) -> ReplicaID: |
| 37 | + return self.replica_id |
| 38 | + |
| 39 | + def set_routing_stats(self, routing_stats: Dict[str, Any]): |
| 40 | + print(f"set_routing_stats {routing_stats=}") |
| 41 | + self.routing_stats = routing_stats |
| 42 | + |
| 43 | + def set_should_fail(self): |
| 44 | + self.should_fail = True |
| 45 | + |
| 46 | + def set_should_hang(self): |
| 47 | + self.should_hang = True |
| 48 | + |
| 49 | + |
| 50 | +def check_routing_stats_recorded( |
| 51 | + handle: DeploymentHandle, |
| 52 | + expected_stats: Dict[str, Any], |
| 53 | + replica_id: Optional[ReplicaID] = None, |
| 54 | +) -> bool: |
| 55 | + running_replicas = handle._router._asyncio_router.request_router._replicas |
| 56 | + if replica_id: |
| 57 | + target_running_replica = running_replicas[replica_id] |
| 58 | + else: |
| 59 | + target_running_replica = next(iter(running_replicas.values())) |
| 60 | + assert ( |
| 61 | + target_running_replica.routing_stats == expected_stats |
| 62 | + ), f"{target_running_replica.routing_stats=} != {expected_stats=}" |
| 63 | + return True |
| 64 | + |
| 65 | + |
| 66 | +@pytest.mark.parametrize("use_class", [True, False]) |
| 67 | +def test_no_user_defined_method(serve_instance, use_class): |
| 68 | + """Check the default behavior.""" |
| 69 | + if use_class: |
| 70 | + |
| 71 | + @serve.deployment |
| 72 | + class A: |
| 73 | + def __call__(self, *args): |
| 74 | + return ray.get_runtime_context().current_actor |
| 75 | + |
| 76 | + else: |
| 77 | + |
| 78 | + @serve.deployment |
| 79 | + def A(*args): |
| 80 | + return ray.get_runtime_context().current_actor |
| 81 | + |
| 82 | + h = serve.run(A.bind()) |
| 83 | + _ = h.remote().result() |
| 84 | + replicas = list(h._router._asyncio_router.request_router._replicas.values()) |
| 85 | + assert len(replicas) == 1 |
| 86 | + assert replicas[0].routing_stats == {} |
| 87 | + |
| 88 | + |
| 89 | +@pytest.mark.asyncio |
| 90 | +async def test_user_defined_method_fails(serve_instance): |
| 91 | + """Check the behavior when a user-defined method fails.""" |
| 92 | + expected_stats = {"foo": "bar"} |
| 93 | + h = serve.run(Patient.bind()) |
| 94 | + await h.set_routing_stats.remote(expected_stats) |
| 95 | + replica_id = await h.remote() |
| 96 | + |
| 97 | + # Ensure the routing stats are recorded correctly before the failure |
| 98 | + wait_for_condition( |
| 99 | + check_routing_stats_recorded, |
| 100 | + handle=h, |
| 101 | + expected_stats=expected_stats, |
| 102 | + replica_id=replica_id, |
| 103 | + ) |
| 104 | + |
| 105 | + await h.set_should_fail.remote() |
| 106 | + await asyncio.gather(*[h.remote() for _ in range(100)]) |
| 107 | + |
| 108 | + # After the failure the previous routing stats should still accessible |
| 109 | + wait_for_condition( |
| 110 | + check_routing_stats_recorded, |
| 111 | + handle=h, |
| 112 | + expected_stats=expected_stats, |
| 113 | + replica_id=replica_id, |
| 114 | + ) |
| 115 | + |
| 116 | + |
| 117 | +# @pytest.mark.asyncio |
| 118 | +# async def test_user_defined_method_hangs(serve_instance): |
| 119 | +# """Check the behavior when a user-defined method hangs.""" |
| 120 | +# expected_stats = {"foo": "bar"} |
| 121 | +# h = serve.run(Patient.bind()) |
| 122 | +# await h.set_routing_stats.remote(expected_stats) |
| 123 | +# replica_id = await h.remote() |
| 124 | +# |
| 125 | +# # Ensure the routing stats are recorded correctly before the failure |
| 126 | +# wait_for_condition(check_routing_stats_recorded, handle=h, expected_stats=expected_stats, replica_id=replica_id) |
| 127 | +# |
| 128 | +# print("A") |
| 129 | +# await h.set_should_hang.remote() |
| 130 | +# print("B") |
| 131 | +# await asyncio.gather(*[h.remote() for _ in range(100)]) |
| 132 | +# print("C") |
| 133 | +# # After the failure the previous routing stats should still accessible |
| 134 | +# wait_for_condition(check_routing_stats_recorded, handle=h, expected_stats=expected_stats, replica_id=replica_id) |
| 135 | +# |
| 136 | +# |
| 137 | +# @pytest.mark.asyncio |
| 138 | +# async def test_multiple_replicas(serve_instance): |
| 139 | +# h = serve.run(Patient.options(num_replicas=2).bind()) |
| 140 | +# actors = { |
| 141 | +# a._actor_id for a in await asyncio.gather(*[h.remote() for _ in range(100)]) |
| 142 | +# } |
| 143 | +# assert len(actors) == 2 |
| 144 | +# |
| 145 | +# await h.set_should_fail.remote() |
| 146 | +# |
| 147 | +# await async_wait_for_condition( |
| 148 | +# check_new_actor_started, handle=h, original_actors=actors |
| 149 | +# ) |
| 150 | +# |
| 151 | +# new_actors = { |
| 152 | +# a._actor_id for a in await asyncio.gather(*[h.remote() for _ in range(100)]) |
| 153 | +# } |
| 154 | +# assert len(new_actors) == 2 |
| 155 | +# assert len(new_actors.intersection(actors)) == 1 |
| 156 | + |
| 157 | + |
| 158 | +if __name__ == "__main__": |
| 159 | + import sys |
| 160 | + |
| 161 | + sys.exit(pytest.main(["-v", "-s", __file__])) |
0 commit comments