Skip to content

Commit 7771ac2

Browse files
committed
refactor: jobs table to contain jsonb data col
1 parent 4c7ed6e commit 7771ac2

File tree

7 files changed

+43
-43
lines changed

7 files changed

+43
-43
lines changed

generate_signature.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44

55
secret = "tmpsecret"
6-
payload_body_string = '{"action":"submitted","review":{"body":"test review","id":1234,"url":"https://github.com/huggingface/lor-e/5#comment-123"},"pull_request":{"title":"my great contribution to the world","body":"superb work, isnt it","id":4321,"number":5,"html_url":"https://github.com/huggingface/lor-e/5", "url":"https://github.com/api/huggingface/lor-e/5"}}'
6+
payload_body_string = '{"action":"created","comment":{"body":"test review","id":1234,"url":"https://github.com/huggingface/lor-e/5#comment-123"}, "issue":{"title":"my great contribution to the world","body":"superb work, isnt it","id":4321,"number":5,"html_url":"https://github.com/huggingface/lor-e/5", "url":"https://github.com/api/huggingface/lor-e/5"}}'
77

88

99
def generate_signature(payload_body, secret_token):

init_db.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ CREATE INDEX IF NOT EXISTS issues_embedding_hnsw_idx ON issues USING hnsw (embed
3636
CREATE TABLE IF NOT EXISTS jobs (
3737
id SERIAL PRIMARY KEY,
3838
repository_id VARCHAR NOT NULL UNIQUE,
39-
page INT NOT NULL,
39+
data JSONB NOT NULL,
4040
created_at timestamp with time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC'),
4141
updated_at timestamp with time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
4242
);

issue-bot/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ once_cell = "1.20"
2424
pgvector = { version = "0.4", features = ["sqlx"] }
2525
reqwest = { version = "0.12", features = ["json"] }
2626
serde = { version = "1.0", features = ["derive"] }
27-
serde_json = "1"
27+
serde_json = { version = "1", features = ["raw_value"] }
2828
sha2 = "0.10"
2929
sqlx = { version = "0.8", features = ["chrono", "postgres", "runtime-tokio"] }
3030
thiserror = "2"

issue-bot/src/github.rs

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ pub enum GithubApiError {
4242
}
4343

4444
#[derive(Debug, Deserialize)]
45+
#[allow(unused)]
4546
struct PullRequest {
46-
#[allow(unused)]
47+
html_url: String,
4748
url: String,
4849
}
4950

@@ -180,13 +181,13 @@ impl GithubApi {
180181

181182
pub(crate) fn get_issues(
182183
&self,
183-
from_page: Option<i32>,
184+
from_page: i32,
184185
repository: RepositoryData,
185186
) -> impl Stream<Item = Result<(IssueWithComments, Option<i32>), GithubApiError>> + use<'_> {
186187
try_stream! {
187188
let url = format!("https://api.github.com/repos/{}/issues", repository.repo_id);
188189
let client = self.client.clone();
189-
let mut page = from_page.unwrap_or(1);
190+
let mut page = from_page;
190191
loop {
191192
let res = client
192193
.get(&url)
@@ -206,7 +207,6 @@ impl GithubApi {
206207
if get_next_page(link_header)?.is_none() {
207208
break;
208209
}
209-
let issues: Vec<Issue> = issues.into_iter().filter(|i| i.pull_request.is_none()).collect();
210210
let page_issue_count = issues.len();
211211
for (i, issue) in issues.into_iter().enumerate() {
212212
let res = client
@@ -226,14 +226,6 @@ impl GithubApi {
226226
}
227227
}
228228
}
229-
230-
// pub(crate) async fn get_prs(
231-
// &self,
232-
// repository: &RepositoryData,
233-
// ) -> Result<Vec<IssueWithComments>, GithubApiError> {
234-
// let issues = Vec::new();
235-
// Ok(issues)
236-
// }
237229
}
238230

239231
async fn handle_ratelimit(remaining: Option<HeaderValue>, reset: Option<HeaderValue>) -> Result<(), GithubApiError> {

issue-bot/src/main.rs

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use axum::{
1717
};
1818
use config::{load_config, IssueBotConfig, ServerConfig};
1919
use embeddings::inference_endpoints::EmbeddingApi;
20-
use futures::{pin_mut, StreamExt};
20+
use futures::{pin_mut, TryStreamExt};
2121
use github::GithubApi;
2222
use huggingface::HuggingfaceApi;
2323
use metrics::start_metrics_server;
@@ -30,6 +30,7 @@ use slack::Slack;
3030
use sqlx::{
3131
postgres::{PgConnectOptions, PgPoolOptions},
3232
prelude::FromRow,
33+
types::Json,
3334
Pool, Postgres, QueryBuilder,
3435
};
3536
use tokio::{
@@ -244,11 +245,20 @@ struct ClosestIssue {
244245
title: String,
245246
number: i32,
246247
html_url: String,
247-
body: String,
248248
#[allow(unused)]
249249
cosine_similarity: f64,
250250
}
251251

252+
#[derive(Debug, Deserialize, FromRow)]
253+
struct JobData {
254+
issues_page: i32,
255+
}
256+
257+
#[derive(Debug, FromRow)]
258+
struct Job {
259+
data: Json<JobData>,
260+
}
261+
252262
async fn handle_webhooks_wrapper(
253263
rx: Receiver<EventData>,
254264
embedding_api: EmbeddingApi,
@@ -282,7 +292,7 @@ async fn handle_webhooks(
282292
Vector::from(embedding_api.generate_embedding(issue_text).await?);
283293

284294
let closest_issues: Vec<ClosestIssue> = sqlx::query_as(
285-
"select title, number, html_url, body, 1 - (embedding <=> $1) as cosine_similarity from issues order by embedding <=> $1 LIMIT 3",
295+
"select title, number, html_url, 1 - (embedding <=> $1) as cosine_similarity from issues order by embedding <=> $1 LIMIT 3",
286296
)
287297
.bind(embedding.clone())
288298
.fetch_all(&pool)
@@ -395,17 +405,17 @@ async fn handle_webhooks(
395405
source = repository.source.to_string()
396406
);
397407
info!(parent: &span, "indexing started");
398-
let from_page = sqlx::query!(
399-
"select page from jobs where repository_id = $1",
408+
let job = sqlx::query_as!(
409+
Job,
410+
r#"select data as "data: Json<JobData>" from jobs where repository_id = $1"#,
400411
repository.repo_id
401412
)
402-
.map(|row| row.page + 1)
403413
.fetch_optional(&pool)
404414
.await?;
405-
let issues = github_api.get_issues(from_page, repository.clone());
415+
let from_issues_page = job.as_ref().map(|j| j.data.issues_page + 1).unwrap_or(1);
416+
let issues = github_api.get_issues(from_issues_page, repository.clone());
406417
pin_mut!(issues);
407-
while let Some(issue) = issues.next().await {
408-
let (issue, page) = issue?;
418+
while let Some((issue, page)) = issues.try_next().await? {
409419
let embedding_api = embedding_api.clone();
410420
let pool = pool.clone();
411421
let source = repository.source.to_string();
@@ -461,19 +471,19 @@ async fn handle_webhooks(
461471
qb.build().execute(&pool).await?;
462472
}
463473
if let Some(page) = page {
464-
sqlx::query!(
465-
r#"insert into jobs (repository_id, page)
466-
values ($1, $2)
474+
sqlx::query(
475+
r#"insert into jobs (repository_id, data)
476+
values ($1, jsonb_build_object('issues_page', $2))
467477
on conflict (repository_id)
468478
do update
469479
set
470-
page = excluded.page,
480+
data = jsonb_set(jobs.data, '{issues_page}', to_jsonb($2::int), true),
471481
updated_at = current_timestamp"#,
472-
repository.repo_id,
473-
page,
474-
)
475-
.execute(&pool)
476-
.await?;
482+
)
483+
.bind(&repository.repo_id)
484+
.bind(page)
485+
.execute(&pool)
486+
.await?;
477487
}
478488
}
479489
sqlx::query!(

issue-bot/src/routes.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ struct IssueData {
104104
html_url: String,
105105
id: i64,
106106
number: i32,
107+
#[serde(default)]
107108
pull_request: Option<PullRequest>,
108109
title: String,
109110
url: String,
@@ -462,8 +463,8 @@ mod tests {
462463
};
463464
let mut app = app(state);
464465

465-
let payload_body = r#"{"action":"opened","pull_request":{"title":"my great contribution to the world","body":"superb work, isnt it","id":4321,"number":5,"html_url":"https://github.com/huggingface/lor-e/5", "url":"https://github.com/api/huggingface/lor-e/5"}}"#;
466-
let sig = "sha256=0f66c678489c6ab39ba9b5a3dfcb957b2bf3b98aebf0872e8cebbc041ff71307";
466+
let payload_body = r#"{"action":"opened","issue":{"title":"my great contribution to the world","body":"superb work, isnt it","id":4321,"number":5,"html_url":"https://github.com/huggingface/lor-e/5", "url":"https://github.com/api/huggingface/lor-e/5"}}"#;
467+
let sig = "sha256=930e7b9a7cca3f85bc49a693f1d9105ca32bb15f13cda8871aaf79bba27c48cc";
467468

468469
let response = app
469470
.borrow_mut()
@@ -480,8 +481,8 @@ mod tests {
480481

481482
assert_eq!(response.status(), StatusCode::OK);
482483

483-
let payload_body = r#"{"action":"submitted","review":{"body":"test review","id":1234,"url":"https://github.com/huggingface/lor-e/5#comment-123"},"pull_request":{"title":"my great contribution to the world","body":"superb work, isnt it","id":4321,"number":5,"html_url":"https://github.com/huggingface/lor-e/5", "url":"https://github.com/api/huggingface/lor-e/5"}}"#;
484-
let sig = "sha256=4a312de764757f5d18610c183602337f0c766791e8a886120302549c00988bba";
484+
let payload_body = r#"{"action":"created","comment":{"body":"test review","id":1234,"url":"https://github.com/huggingface/lor-e/5#comment-123"}, "issue":{"title":"my great contribution to the world","body":"superb work, isnt it","id":4321,"number":5,"html_url":"https://github.com/huggingface/lor-e/5", "url":"https://github.com/api/huggingface/lor-e/5"}}"#;
485+
let sig = "sha256=cb81358e382b98e54789541ec9deeef909d89437311b9c626b13b40662f5ef52";
485486

486487
let response = app
487488
.oneshot(

issue-bot/src/slack.rs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,11 @@ impl Slack {
5959
closest_issues: &[ClosestIssue],
6060
) -> Result<(), SlackError> {
6161
let mut msg = vec![format!(
62-
"Closest issues for {} <{}|#{}>:\n```{}```",
63-
issue.title, issue.number, issue.html_url, issue.body
62+
"Closest issues for {} (<{}|#{}>):\n```{}```",
63+
issue.title, issue.html_url, issue.number, issue.body
6464
)];
6565
for ci in closest_issues {
66-
msg.push(format!(
67-
"- {} (<{}|#{}>):\n```{}```",
68-
ci.title, ci.html_url, ci.number, ci.body
69-
));
66+
msg.push(format!("- {} (<{}|#{}>)", ci.title, ci.html_url, ci.number));
7067
}
7168
let body = SlackBody::new(&self.channel, msg.join("\n"));
7269
self.client

0 commit comments

Comments
 (0)