Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions openlibrary/fastapi/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from __future__ import annotations

from typing import Self

from pydantic import BaseModel, Field, model_validator


class Pagination(BaseModel):
"""Reusable pagination parameters for API endpoints."""

limit: int = Field(100, ge=0, description="Maximum number of results to return.")
offset: int | None = Field(
None, ge=0, description="Number of results to skip.", exclude=True
)
page: int | None = Field(None, ge=1, description="Page number (1-indexed).")

@model_validator(mode='after')
def normalize_pagination(self) -> Self:
if self.offset is not None:
self.page = None
elif self.page is None:
self.page = 1
return self


# This is a simple class to have a pagination with a limit of 20. Can be turned into a factory as needed.
class PaginationLimit20(Pagination):
limit: int = Field(20, ge=0, description="Maximum number of results to return.")
206 changes: 179 additions & 27 deletions openlibrary/fastapi/search.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,41 @@
from __future__ import annotations

import json
from typing import Annotated, Any, Literal, Self
from typing import Annotated, Any, Literal

from fastapi import APIRouter, Query, Request
import web
from fastapi import APIRouter, Depends, HTTPException, Path, Query, Request
from pydantic import (
BaseModel,
BeforeValidator,
ConfigDict,
Field,
computed_field,
field_validator,
model_validator,
)

from openlibrary.core.fulltext import fulltext_search_async
from openlibrary.plugins.inside.code import RESULTS_PER_PAGE
from openlibrary.fastapi.models import Pagination, PaginationLimit20
from openlibrary.plugins.worksearch.code import (
async_run_solr_query,
default_spellcheck_count,
validate_search_json_query,
work_search_async,
)
from openlibrary.plugins.worksearch.schemes.authors import AuthorSearchScheme
from openlibrary.plugins.worksearch.schemes.lists import ListSearchScheme
from openlibrary.plugins.worksearch.schemes.subjects import SubjectSearchScheme
from openlibrary.plugins.worksearch.schemes.works import WorkSearchScheme
from openlibrary.plugins.worksearch.subjects import (
DEFAULT_RESULTS,
MAX_RESULTS,
date_range_to_publish_year_filter,
get_subject,
)

router = APIRouter()


# Ideally this will go in a models files, we'll move it for the 2nd endpoint
class Pagination(BaseModel):
"""Reusable pagination parameters for API endpoints."""

limit: int = Field(100, ge=0, description="Maximum number of results to return.")
offset: int | None = Field(
None, ge=0, description="Number of results to skip.", exclude=True
)
page: int | None = Field(None, ge=1, description="Page number (1-indexed).")

@model_validator(mode='after')
def normalize_pagination(self) -> Self:
if self.offset is not None:
self.page = None
elif self.page is None:
self.page = 1
return self


class PublicQueryOptions(BaseModel):
"""
All parameters (and Pagination) that will be passed to the query.
Expand Down Expand Up @@ -220,10 +211,171 @@ async def search_json(

@router.get("/search/inside.json")
async def search_inside_json(
pagination: Annotated[PaginationLimit20, Depends()],
q: str = Query(..., title="Search query"),
page: int | None = Query(1, ge=1, description="Page number"),
limit: int | None = Query(
RESULTS_PER_PAGE, ge=0, le=RESULTS_PER_PAGE, description="Results per page"
):
return await fulltext_search_async(
q, page=pagination.page, limit=pagination.limit, js=True, facets=True
)


@router.get("/search/subjects.json")
async def search_subjects_json(
pagination: Annotated[Pagination, Depends()],
q: str = Query("", description="The search query"),
):
response = await async_run_solr_query(
SubjectSearchScheme(),
{'q': q},
offset=pagination.offset,
rows=pagination.limit,
sort='work_count desc',
request_label='SUBJECT_SEARCH_API',
)

# Backward compatibility
raw_resp = response.raw_resp['response']
for doc in raw_resp['docs']:
doc['type'] = doc.get('subject_type', 'subject')
doc['count'] = doc.get('work_count', 0)

return raw_resp


@router.get("/search/lists.json")
async def search_lists_json(
pagination: Annotated[PaginationLimit20, Depends()],
q: str = Query("", description="The search query"),
fields: str = Query("", description="Fields to return"),
sort: str = Query("", description="Sort order"),
api: str = Query(
"", description="API version: 'next' for new format, empty for old format"
),
):
return await fulltext_search_async(q, page=page, limit=limit, js=True, facets=True)
response = await async_run_solr_query(
ListSearchScheme(),
{'q': q},
offset=pagination.offset,
rows=pagination.limit,
fields=fields,
sort=sort,
request_label='LIST_SEARCH_API',
)

if api == 'next':
# Match search.json
return {
'numFound': response.num_found,
'num_found': response.num_found,
'start': pagination.offset,
'q': q,
'docs': response.docs,
}
else:
# Default to the old API shape for a while, then we'll flip
lists = web.ctx.site.get_many([doc['key'] for doc in response.docs])
return {
'start': pagination.offset,
'docs': [lst.preview() for lst in lists],
}


@router.get("/search/authors.json")
async def search_authors_json(
pagination: Annotated[Pagination, Depends()],
q: str = Query("", description="The search query"),
fields: str = Query("*", description="Fields to return"),
sort: str = Query("", description="Sort order"),
):
response = await async_run_solr_query(
AuthorSearchScheme(),
{'q': q},
offset=pagination.offset,
rows=pagination.limit,
fields=fields,
sort=sort,
request_label='AUTHOR_SEARCH_API',
)

# SIGH the public API exposes the key like this :(
raw_resp = response.raw_resp['response']
for doc in raw_resp['docs']:
doc['key'] = doc['key'].split('/')[-1]

return raw_resp


class SubjectsRequestParams(BaseModel):
"""Parameters for the subjects endpoint."""

limit: int = Field(
DEFAULT_RESULTS,
ge=1,
le=MAX_RESULTS,
description="Maximum number of results to return.",
)
offset: int = Field(0, ge=0, description="Number of results to skip.")
details: bool = Field(False, description="Include detailed facet information.")
has_fulltext: bool = Field(False, description="Filter for books with fulltext.")
sort: str = Field("editions", description="Sort order of results.")
available: bool = Field(
False, description="Filter for available books (currently unused)."
)
published_in: str | None = Field(
None, description="Date range filter (e.g., '2000-2010' or '2000')."
)

@computed_field
def filters(self) -> dict[str, str]:
"""Build filters dict based on query parameters."""
filters: dict[str, str] = {}
if self.has_fulltext:
filters['has_fulltext'] = 'true'

if publish_year_filter := date_range_to_publish_year_filter(self.published_in):
filters['publish_year'] = publish_year_filter

return filters


@router.get("/subjects/{subjects}.json")
async def subjects_json(
subjects: Annotated[
str, Path(description="Subject key (e.g., 'fiction' or 'person:harry_potter')")
],
params: Annotated[SubjectsRequestParams, Query()],
) -> Any:
"""
Returns works related to a specific subject.

Supports various query parameters for filtering and pagination:
- **limit**: Number of results (default: 12, max: 1000)
- **offset**: Number of results to skip (default: 0)
- **details**: Include facet information (default: false)
- **has_fulltext**: Filter for books with fulltext (default: false)
- **sort**: Sort order (default: 'editions')
- **published_in**: Date range filter (e.g., '2000-2010' or '2000')
"""
# Normalize the key
subjects = subjects.lower()

# Get subject data
try:
subject_results = get_subject(
subjects,
offset=params.offset,
limit=params.limit,
sort=params.sort,
details=params.details,
request_label='SUBJECT_ENGINE_API',
**params.filters,
)
except NotImplementedError:
# No SubjectEngine for this key
raise HTTPException(status_code=404, detail=f"Subject not found: {subjects}")

# Adjust ebook_count if has_fulltext filter is applied
if params.has_fulltext:
subject_results['ebook_count'] = subject_results['work_count']

return subject_results
3 changes: 3 additions & 0 deletions openlibrary/plugins/worksearch/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,7 @@ def get_results(


# inherits from list_search but modifies the GET response to return results in JSON format
@deprecated('migrated to fastapi')
class list_search_json(list_search):
# used subject_search_json as a reference
path = '/search/lists'
Expand Down Expand Up @@ -984,6 +985,7 @@ def get_results(
return response


@deprecated("migrated to fastapi")
class subject_search_json(subject_search):
path = '/search/subjects'
encoding = 'json'
Expand Down Expand Up @@ -1040,6 +1042,7 @@ def get_results(
return resp


@deprecated("migrated to fastapi")
class author_search_json(author_search):
path = '/search/authors'
encoding = 'json'
Expand Down
2 changes: 1 addition & 1 deletion openlibrary/plugins/worksearch/subjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def process_key(self, key):
return key


def date_range_to_publish_year_filter(published_in: str) -> str:
def date_range_to_publish_year_filter(published_in: str | None) -> str:
if published_in:
if '-' in published_in:
begin, end = published_in.split('-', 1)
Expand Down
Loading
Loading