Skip to content

crash at cert change #795

@Besroy

Description

@Besroy

When the cert changes, HS restarts grpc server in the nuraft mesg. However, if rpc requests are active during the restart, a crash may occur. We need to ensure servers restart gracefully to prevent this.

The stack is :

(gdb) bt
#0  0x000072f0fdbb6b2c in pthread_kill () from /lib/x86_64-linux-gnu/libc.so.6
#1  0x000072f0fdb5d27e in raise () from /lib/x86_64-linux-gnu/libc.so.6
#2  0x000072f0fdb408ff in abort () from /lib/x86_64-linux-gnu/libc.so.6
#3  0x00006434a43b3db8 in grpc_core::Crash (message="ASSERTION FAILED: grpc_cq_begin_op(cq_, notify_tag)", location=...) at /home/ubuntu/.conan2/p/b/grpc66871916605d2/b/src/src/core/lib/gprpp/crash.cc:30
#4  0x00006434a43b1fca in gpr_assertion_failed (filename=0x6434a446d9d0 "/home/ubuntu/.conan2/p/b/grpc66871916605d2/b/src/src/core/lib/surface/call.cc", line=1855, message=<optimized out>)
    at /usr/include/c++/11/string_view:137
#5  0x00006434a3cbb2f9 in grpc_core::FilterStackCall::StartBatch (this=<optimized out>, ops=<optimized out>, nops=3, notify_tag=<optimized out>, is_notify_tag_closure=<optimized out>)
    at /home/ubuntu/.conan2/p/b/grpc66871916605d2/b/src/src/core/lib/surface/call.cc:1855
#6  0x00006434a3cb1fb1 in grpc_call_start_batch (call=0x72efe801c890, ops=0x72f026feeec0, nops=3, tag=0x72effc437d30, reserved=<optimized out>)
    at /home/ubuntu/.conan2/p/b/grpc66871916605d2/b/src/src/core/lib/surface/call.cc:3539
#7  0x00006434a32ab517 in grpc::internal::CallOpSet<grpc::internal::CallOpSendInitialMetadata, grpc::internal::CallOpSendMessage, grpc::internal::CallOpServerSendStatus, grpc::internal::CallNoOp<4>, grpc::internal::CallNoOp<5>, grpc::internal::CallNoOp<6> >::ContinueFillOpsAfterInterception (this=0x72effc437d30) at /home/ubuntu/.conan2/p/b/grpc66871916605d2/p/include/grpcpp/impl/call.h:70
#8  0x00006434a376cf79 in grpc::internal::call::PerformOps (ops=0x72effc437d30, this=0x72effc437b98) at /home/ubuntu/.conan2/p/b/grpc66871916605d2/p/include/grpcpp/impl/call.h:67
#9  grpc::ServerAsyncResponseWriter<nuraft_mesg::RaftGroupMsg>::Finish (tag=0x72effc438598, status=..., msg=..., this=0x72effc437b90)
    at /home/ubuntu/.conan2/p/b/grpc66871916605d2/p/include/grpcpp/support/async_unary_call.h:356
#10 sisl::RpcData<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg, false>::do_non_streaming_send (this=0x72effc437710) at /home/ubuntu/sisl/include/sisl/grpc/rpc_call.hpp:345
#11 0x00006434a375f855 in boost::asio::detail::binder0<nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> >::operator() (this=0x72f026fef3f0) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/bind_handler.hpp:60
#12 boost::asio::asio_handler_invoke<boost::asio::detail::binder0<nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> > > (function=...) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/handler_invoke_hook.hpp:88
#13 boost_asio_handler_invoke_helpers::invoke<boost::asio::detail::binder0<nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> >, nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> > (context=...,
    function=...) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/handler_invoke_helpers.hpp:54
#14 boost::asio::detail::asio_handler_invoke<boost::asio::detail::binder0<nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> >, nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> > (
    this_handler=0x72f026fef3f0, function=...) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/bind_handler.hpp:111
#15 boost_asio_handler_invoke_helpers::invoke<boost::asio::detail::binder0<nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> >, boost::asio::detail::binder0<nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> > > (context=..., function=...) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/handler_invoke_helpers.hpp:54
#16 boost::asio::detail::executor_op<boost::asio::detail::binder0<nuraft_mesg::proto_service::raftStep(sisl::AsyncRpcDataPtr<nuraft_mesg::Messaging, nuraft_mesg::RaftGroupMsg, nuraft_mesg::RaftGroupMsg>&)::<lambda()> >, std::allocator<void>, boost::asio::detail::scheduler_operation>::do_complete(void *, boost::asio::detail::scheduler_operation *, const boost::system::error_code &, std::size_t) (
    owner=0x6434ee0b16d0, base=<optimized out>) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/executor_op.hpp:71
#17 0x00006434a3763e47 in boost::asio::detail::scheduler_operation::complete (bytes_transferred=0, ec=..., owner=0x6434ee0b16d0, this=0x72efe00c4140)
    at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/scheduler_operation.hpp:40
#18 boost::asio::detail::scheduler::do_run_one (this=this@entry=0x6434ee0b16d0, lock=..., this_thread=..., ec=...)
    at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/impl/scheduler.ipp:493
#19 0x00006434a375d671 in boost::asio::detail::scheduler::run (this=0x6434ee0b16d0, ec=...) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/impl/scheduler.ipp:210
#20 0x00006434a376456d in boost::asio::thread_pool::thread_function::operator() (this=<optimized out>) at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/impl/thread_pool.ipp:39
#21 boost::asio::detail::posix_thread::func<boost::asio::thread_pool::thread_function>::run (this=<optimized out>)
    at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/posix_thread.hpp:86
#22 0x00006434a37606f4 in boost::asio::detail::boost_asio_detail_posix_thread_function (arg=0x6434ee0b4d80)
    at /home/ubuntu/.conan2/p/b/boost772ff092bb6cd/p/include/boost/asio/detail/impl/posix_thread.ipp:74
#23 0x000072f0fdbb4aa4 in ?? () from /lib/x86_64-linux-gnu/libc.so.6

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions