Skip to content

implement support for streaming replication WIP (for #116) #652

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ services:
postgres:
image: "sfackler/rust-postgres-test:6"
ports:
- 5433:5433
- 5433:5433
environment:
- POSTGRES_PASSWORD=pass
volumes:
- "./docker/sql_setup.sh:/docker-entrypoint-initdb.d/sql_setup.sh"

1 change: 1 addition & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
FROM postgres:12

COPY sql_setup.sh /docker-entrypoint-initdb.d/
RUN apt-get update && apt-get install postgresql-12-wal2json
2 changes: 2 additions & 0 deletions docker/sql_setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ port = 5433
ssl = on
ssl_cert_file = 'server.crt'
ssl_key_file = 'server.key'
wal_level = logical
log_statement = 'all'
EOCONF

cat > "$PGDATA/pg_hba.conf" <<-EOCONF
Expand Down
33 changes: 33 additions & 0 deletions postgres-protocol/src/message/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub const PARAMETER_STATUS_TAG: u8 = b'S';
pub const PARAMETER_DESCRIPTION_TAG: u8 = b't';
pub const ROW_DESCRIPTION_TAG: u8 = b'T';
pub const READY_FOR_QUERY_TAG: u8 = b'Z';
pub const COPY_BOTH_RESPONSE_TAG: u8 = b'W';

#[derive(Debug, Copy, Clone)]
pub struct Header {
Expand Down Expand Up @@ -93,6 +94,7 @@ pub enum Message {
CopyDone,
CopyInResponse(CopyInResponseBody),
CopyOutResponse(CopyOutResponseBody),
CopyBothResponse(CopyBothResponseBody),
DataRow(DataRowBody),
EmptyQueryResponse,
ErrorResponse(ErrorResponseBody),
Expand Down Expand Up @@ -190,6 +192,16 @@ impl Message {
storage,
})
}
COPY_BOTH_RESPONSE_TAG => {
let format = buf.read_u8()?;
let len = buf.read_u16::<BigEndian>()?;
let storage = buf.read_all();
Message::CopyBothResponse(CopyBothResponseBody {
format,
len,
storage,
})
}
EMPTY_QUERY_RESPONSE_TAG => Message::EmptyQueryResponse,
BACKEND_KEY_DATA_TAG => {
let process_id = buf.read_i32::<BigEndian>()?;
Expand Down Expand Up @@ -524,6 +536,27 @@ impl CopyOutResponseBody {
}
}

pub struct CopyBothResponseBody {
storage: Bytes,
len: u16,
format: u8,
}

impl CopyBothResponseBody {
#[inline]
pub fn format(&self) -> u8 {
self.format
}

#[inline]
pub fn column_formats(&self) -> ColumnFormats<'_> {
ColumnFormats {
remaining: self.len,
buf: &self.storage,
}
}
}

pub struct DataRowBody {
storage: Bytes,
len: u16,
Expand Down
20 changes: 20 additions & 0 deletions postgres-protocol/src/message/frontend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,26 @@ pub fn close(variant: u8, name: &str, buf: &mut BytesMut) -> io::Result<()> {
})
}

#[inline]
pub fn standby_status_update(
write_lsn: i64,
flush_lsn: i64,
apply_lsn: i64,
timestamp: i64,
buf: &mut BytesMut,
) -> io::Result<()> {
buf.put_u8(b'd');
write_body(buf, |buf| {
buf.put_u8(b'r');
buf.put_i64(write_lsn + 1);
buf.put_i64(flush_lsn + 1);
buf.put_i64(apply_lsn + 1);
buf.put_i64(timestamp);
buf.put_u8(0);
Ok(())
})
}

pub struct CopyData<T> {
buf: T,
len: i32,
Expand Down
26 changes: 24 additions & 2 deletions tokio-postgres/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::config::{Host, SslMode};
use crate::connection::{Request, RequestMessages};
use crate::copy_out::CopyOutStream;
use crate::query::RowStream;
use crate::replication::ReplicationStream;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we have separate LogicalReplicationStream and PhysicalReplicationStream?

use crate::simple_query::SimpleQueryStream;
#[cfg(feature = "runtime")]
use crate::tls::MakeTlsConnect;
Expand All @@ -11,8 +12,9 @@ use crate::types::{Oid, ToSql, Type};
#[cfg(feature = "runtime")]
use crate::Socket;
use crate::{
copy_in, copy_out, prepare, query, simple_query, slice_iter, CancelToken, CopyInSink, Error,
Row, SimpleQueryMessage, Statement, ToStatement, Transaction, TransactionBuilder,
copy_in, copy_out, prepare, query, replication, simple_query, slice_iter, CancelToken,
CopyInSink, Error, Row, SimpleQueryMessage, Statement, ToStatement, Transaction,
TransactionBuilder,
};
use bytes::{Buf, BytesMut};
use fallible_iterator::FallibleIterator;
Expand Down Expand Up @@ -433,6 +435,26 @@ impl Client {
copy_out::copy_out(self.inner(), statement).await
}

/// Executes a 'START_REPLICATION SLOT ...', returning a stream of raw replication events
pub async fn start_replication(&self, query: &str) -> Result<ReplicationStream, Error> {
replication::start_replication(self.inner(), query).await
}

/// Stoppes the current replication by sending a copy_done message
pub async fn stop_replication(&self) -> Result<(), Error> {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be handled on the stream type like it is for copy_in.

replication::stop_replication(self.inner()).await
}

/// Notifies PostgreSQL of the last processed WAL
pub async fn standby_status_update(
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is sent as part of the CopyBoth stream, not at the top level: https://www.postgresql.org/docs/13/protocol-replication.html.

&self,
write_lsn: i64,
flush_lsn: i64,
apply_lsn: i64,
) -> Result<(), Error> {
replication::standby_status_update(self.inner(), write_lsn, flush_lsn, apply_lsn).await
}

/// Executes a sequence of SQL statements using the simple query protocol, returning the resulting rows.
///
/// Statements should be separated by semicolons. If an error occurs, execution of the sequence will stop at that
Expand Down
10 changes: 10 additions & 0 deletions tokio-postgres/src/codec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,22 @@ use postgres_protocol::message::backend;
use postgres_protocol::message::frontend::CopyData;
use std::io;
use tokio_util::codec::{Decoder, Encoder};
//use std::fmt;

pub enum FrontendMessage {
Raw(Bytes),
CopyData(CopyData<Box<dyn Buf + Send>>),
}

// impl fmt::Debug for FrontendMessage {
// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// match self {
// FrontendMessage::Raw(b) => write!(f, "FrontendMessage::Raw({:?})", b),
// FrontendMessage::CopyData(b) => write!(f, "FrontendMessage::CopyData({:?})", "***"),
// }
// }
// }

pub enum BackendMessage {
Normal {
messages: BackendMessages,
Expand Down
17 changes: 17 additions & 0 deletions tokio-postgres/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ pub struct Config {
pub(crate) keepalives_idle: Duration,
pub(crate) target_session_attrs: TargetSessionAttrs,
pub(crate) channel_binding: ChannelBinding,
pub(crate) replication: Option<String>,
}

impl Default for Config {
Expand All @@ -184,6 +185,7 @@ impl Config {
keepalives_idle: Duration::from_secs(2 * 60 * 60),
target_session_attrs: TargetSessionAttrs::Any,
channel_binding: ChannelBinding::Prefer,
replication: None,
}
}

Expand Down Expand Up @@ -387,6 +389,17 @@ impl Config {
self.channel_binding
}

/// TODO!
pub fn replication(&mut self, replication: &str) -> &mut Config {
self.replication = Some(replication.to_string());
self
}

/// TODO!
pub fn get_replication(&self) -> Option<&str> {
self.replication.as_deref()
}

fn param(&mut self, key: &str, value: &str) -> Result<(), Error> {
match key {
"user" => {
Expand Down Expand Up @@ -476,6 +489,9 @@ impl Config {
};
self.channel_binding(channel_binding);
}
"replication" => {
self.replication(&value);
}
key => {
return Err(Error::config_parse(Box::new(UnknownOption(
key.to_string(),
Expand Down Expand Up @@ -548,6 +564,7 @@ impl fmt::Debug for Config {
.field("keepalives_idle", &self.keepalives_idle)
.field("target_session_attrs", &self.target_session_attrs)
.field("channel_binding", &self.channel_binding)
.field("replication", &self.replication)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of making this an ad-hoc parameter, should we make this a different connection type? That would add better compile-time checking that you decide what you are going to use the connection for (normal, physical replication, or logical replication).

.finish()
}
}
Expand Down
3 changes: 3 additions & 0 deletions tokio-postgres/src/connect_raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ where
if let Some(application_name) = &config.application_name {
params.push(("application_name", &**application_name));
}
if let Some(replication) = &config.replication {
params.push(("replication", &**replication));
}

let mut buf = BytesMut::new();
frontend::startup_message(params, &mut buf).map_err(Error::encode)?;
Expand Down
2 changes: 2 additions & 0 deletions tokio-postgres/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ pub use crate::error::Error;
pub use crate::generic_client::GenericClient;
pub use crate::portal::Portal;
pub use crate::query::RowStream;
pub use crate::replication::ReplicationStream;
pub use crate::row::{Row, SimpleQueryRow};
pub use crate::simple_query::SimpleQueryStream;
#[cfg(feature = "runtime")]
Expand Down Expand Up @@ -163,6 +164,7 @@ mod maybe_tls_stream;
mod portal;
mod prepare;
mod query;
mod replication;
pub mod row;
mod simple_query;
#[cfg(feature = "runtime")]
Expand Down
89 changes: 89 additions & 0 deletions tokio-postgres/src/replication.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
use crate::client::{InnerClient, Responses};
use crate::codec::FrontendMessage;
use crate::connection::RequestMessages;
use crate::{simple_query, Error};
use bytes::{Bytes, BytesMut};
use futures::{ready, Stream};
use log::trace;
use pin_project_lite::pin_project;
use postgres_protocol::message::backend::Message;
use postgres_protocol::message::frontend;
use std::marker::PhantomPinned;
use std::pin::Pin;
use std::task::{Context, Poll};
use std::time::{SystemTime, UNIX_EPOCH};
const J2000_EPOCH_GAP: u128 = 946_684_800_000_000;
pub async fn start_replication(
client: &InnerClient,
query: &str,
) -> Result<ReplicationStream, Error> {
trace!("executing start replication query {}", query);

let buf = simple_query::encode(client, query)?;
let responses = start(client, buf).await?;
Ok(ReplicationStream {
responses,
_p: PhantomPinned,
})
}

pub async fn stop_replication(client: &InnerClient) -> Result<(), Error> {
trace!("executing stop replication");
let mut buf = BytesMut::new();
frontend::copy_done(&mut buf);
let _ = client.send(RequestMessages::Single(FrontendMessage::Raw(buf.freeze())))?;
Ok(())
}

pub async fn standby_status_update(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you consider also adding support for:

  • identify_system
  • base_backup
  • hot standby feedback
  • creating/dropping slots
  • timeline history
  • show

(I'm not the crate maintainer so don't take these as blockers. It might be fine to do those in later PRs after the basic support is in.)

client: &InnerClient,
write_lsn: i64,
flush_lsn: i64,
apply_lsn: i64,
) -> Result<(), Error> {
trace!("executing standby_status_update");
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap()
.as_micros()
- J2000_EPOCH_GAP;
let mut buf = BytesMut::new();
let _ = frontend::standby_status_update(write_lsn, flush_lsn, apply_lsn, now as i64, &mut buf);
let _ = client.send(RequestMessages::Single(FrontendMessage::Raw(buf.freeze())))?;
Ok(())
}

async fn start(client: &InnerClient, buf: Bytes) -> Result<Responses, Error> {
let mut responses = client.send(RequestMessages::Single(FrontendMessage::Raw(buf)))?;
trace!("start in repication");

match responses.next().await? {
Message::CopyBothResponse(_) => {}
_ => return Err(Error::unexpected_message()),
}

Ok(responses)
}

pin_project! {
/// A stream of `START_REPLICATION` query data.
pub struct ReplicationStream {
responses: Responses,
#[pin]
_p: PhantomPinned,
}
}

impl Stream for ReplicationStream {
type Item = Result<Bytes, Error>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest an enum here that describes the messages coming back at least as far as the protocol defines them, e.g. XLogData and PrimaryKeepAlive.

At least for now, we don't need to parse the WAL data itself, but we can represent all of the information that the protocol defines.


fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
let this = self.project();

match ready!(this.responses.poll_next(cx)?) {
Message::CopyData(body) => Poll::Ready(Some(Ok(body.into_bytes()))),
Message::CopyDone => Poll::Ready(None),
_ => Poll::Ready(Some(Err(Error::unexpected_message()))),
}
}
}
2 changes: 1 addition & 1 deletion tokio-postgres/src/simple_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ pub async fn batch_execute(client: &InnerClient, query: &str) -> Result<(), Erro
}
}

fn encode(client: &InnerClient, query: &str) -> Result<Bytes, Error> {
pub fn encode(client: &InnerClient, query: &str) -> Result<Bytes, Error> {
client.with_buf(|buf| {
frontend::query(query, buf).map_err(Error::encode)?;
Ok(buf.split().freeze())
Expand Down
Loading