Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 171 additions & 0 deletions gsheets/sheets_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@

import asyncio
import json
import logging
import re
from typing import List, Optional, Union

from core.utils import UserInputError

logger = logging.getLogger(__name__)

MAX_GRID_METADATA_CELLS = 5000

A1_PART_REGEX = re.compile(r"^([A-Za-z]*)(\d*)$")
SHEET_TITLE_SAFE_RE = re.compile(r"^[A-Za-z0-9_]+$")
Expand Down Expand Up @@ -877,3 +881,170 @@ def _build_gradient_rule(
rule_body["gradientRule"]["midpoint"] = gradient_points[1]
rule_body["gradientRule"]["maxpoint"] = gradient_points[2]
return rule_body


def _extract_cell_notes_from_grid(spreadsheet: dict) -> list[dict[str, str]]:
"""
Extract cell notes from spreadsheet grid data.

Returns a list of dictionaries with:
- "cell": cell A1 reference
- "note": the note text
"""
notes: list[dict[str, str]] = []
for sheet in spreadsheet.get("sheets", []) or []:
sheet_title = sheet.get("properties", {}).get("title") or "Unknown"
for grid in sheet.get("data", []) or []:
start_row = _coerce_int(grid.get("startRow"), default=0)
start_col = _coerce_int(grid.get("startColumn"), default=0)
for row_offset, row_data in enumerate(grid.get("rowData", []) or []):
if not row_data:
continue
for col_offset, cell_data in enumerate(
row_data.get("values", []) or []
):
if not cell_data:
continue
note = cell_data.get("note")
if not note:
continue
notes.append(
{
"cell": _format_a1_cell(
sheet_title,
start_row + row_offset,
start_col + col_offset,
),
"note": note,
}
)
return notes


async def _fetch_sheet_notes(
service, spreadsheet_id: str, a1_range: str
) -> list[dict[str, str]]:
"""Fetch cell notes for the given range via spreadsheets.get with includeGridData."""
response = await asyncio.to_thread(
service.spreadsheets()
.get(
spreadsheetId=spreadsheet_id,
ranges=[a1_range],
includeGridData=True,
fields="sheets(properties(title),data(startRow,startColumn,rowData(values(note))))",
)
.execute
)
return _extract_cell_notes_from_grid(response)


def _format_sheet_notes_section(
*, notes: list[dict[str, str]], range_label: str, max_details: int = 25
) -> str:
"""
Format a list of cell notes into a human-readable section.
"""
if not notes:
return ""

lines = []
for item in notes[:max_details]:
cell = item.get("cell") or "(unknown cell)"
note = item.get("note") or "(empty note)"
lines.append(f"- {cell}: {note}")

suffix = (
f"\n... and {len(notes) - max_details} more notes"
if len(notes) > max_details
else ""
)
return f"\n\nCell notes in range '{range_label}':\n" + "\n".join(lines) + suffix


async def _fetch_grid_metadata(
service,
spreadsheet_id: str,
resolved_range: str,
values: List[List[object]],
include_hyperlinks: bool = False,
include_notes: bool = False,
) -> tuple[str, str]:
"""Fetch hyperlinks and/or notes for a range via a single spreadsheets.get call.

Computes tight range bounds, enforces the cell-count cap, builds a combined
``fields`` selector so only one API round-trip is needed when both flags are
``True``, then parses the response into formatted output sections.

Returns:
(hyperlink_section, notes_section) — each is an empty string when the
corresponding flag is ``False`` or no data was found.
"""
if not include_hyperlinks and not include_notes:
return "", ""

tight_range = _a1_range_for_values(resolved_range, values)
if not tight_range:
logger.info(
"[read_sheet_values] Skipping grid metadata fetch for range '%s': "
"unable to determine tight bounds",
resolved_range,
)
return "", ""

cell_count = _a1_range_cell_count(tight_range) or sum(len(row) for row in values)
if cell_count > MAX_GRID_METADATA_CELLS:
logger.info(
"[read_sheet_values] Skipping grid metadata fetch for large range "
"'%s' (%d cells > %d limit)",
tight_range,
cell_count,
MAX_GRID_METADATA_CELLS,
)
return "", ""

# Build a combined fields selector so we hit the API at most once.
value_fields: list[str] = []
if include_hyperlinks:
value_fields.extend(["hyperlink", "textFormatRuns(format(link(uri)))"])
if include_notes:
value_fields.append("note")

fields = (
"sheets(properties(title),data(startRow,startColumn,"
f"rowData(values({','.join(value_fields)}))))"
)

try:
response = await asyncio.to_thread(
service.spreadsheets()
.get(
spreadsheetId=spreadsheet_id,
ranges=[tight_range],
includeGridData=True,
fields=fields,
)
.execute
)
except Exception as exc:
logger.warning(
"[read_sheet_values] Failed fetching grid metadata for range '%s': %s",
tight_range,
exc,
)
return "", ""

hyperlink_section = ""
if include_hyperlinks:
hyperlinks = _extract_cell_hyperlinks_from_grid(response)
hyperlink_section = _format_sheet_hyperlink_section(
hyperlinks=hyperlinks, range_label=tight_range
)

notes_section = ""
if include_notes:
notes = _extract_cell_notes_from_grid(response)
notes_section = _format_sheet_notes_section(
notes=notes, range_label=tight_range
)

return hyperlink_section, notes_section
53 changes: 13 additions & 40 deletions gsheets/sheets_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,14 @@
from core.utils import handle_http_errors, UserInputError
from core.comments import create_comment_tools
from gsheets.sheets_helpers import (
_a1_range_cell_count,
CONDITION_TYPES,
_a1_range_for_values,
_build_boolean_rule,
_build_gradient_rule,
_fetch_detailed_sheet_errors,
_fetch_sheet_hyperlinks,
_fetch_grid_metadata,
_fetch_sheets_with_rules,
_format_conditional_rules_section,
_format_sheet_hyperlink_section,
_format_sheet_error_section,
_parse_a1_range,
_parse_condition_values,
Expand All @@ -36,7 +34,6 @@

# Configure module logger
logger = logging.getLogger(__name__)
MAX_HYPERLINK_FETCH_CELLS = 5000


@server.tool()
Expand Down Expand Up @@ -179,6 +176,7 @@ async def read_sheet_values(
spreadsheet_id: str,
range_name: str = "A1:Z1000",
include_hyperlinks: bool = False,
include_notes: bool = False,
) -> str:
"""
Reads values from a specific range in a Google Sheet.
Expand All @@ -189,6 +187,8 @@ async def read_sheet_values(
range_name (str): The range to read (e.g., "Sheet1!A1:D10", "A1:D10"). Defaults to "A1:Z1000".
include_hyperlinks (bool): If True, also fetch hyperlink metadata for the range.
Defaults to False to avoid expensive includeGridData requests.
include_notes (bool): If True, also fetch cell notes for the range.
Defaults to False to avoid expensive includeGridData requests.

Returns:
str: The formatted values from the specified range.
Expand All @@ -211,41 +211,14 @@ async def read_sheet_values(
resolved_range = result.get("range", range_name)
detailed_range = _a1_range_for_values(resolved_range, values) or resolved_range

hyperlink_section = ""
if include_hyperlinks:
# Use a tight A1 range for includeGridData fetches to avoid expensive
# open-ended requests (e.g., A:Z).
hyperlink_range = _a1_range_for_values(resolved_range, values)
if not hyperlink_range:
logger.info(
"[read_sheet_values] Skipping hyperlink fetch for range '%s': unable to determine tight bounds",
resolved_range,
)
else:
cell_count = _a1_range_cell_count(hyperlink_range) or sum(
len(row) for row in values
)
if cell_count <= MAX_HYPERLINK_FETCH_CELLS:
try:
hyperlinks = await _fetch_sheet_hyperlinks(
service, spreadsheet_id, hyperlink_range
)
hyperlink_section = _format_sheet_hyperlink_section(
hyperlinks=hyperlinks, range_label=hyperlink_range
)
except Exception as exc:
logger.warning(
"[read_sheet_values] Failed fetching hyperlinks for range '%s': %s",
hyperlink_range,
exc,
)
else:
logger.info(
"[read_sheet_values] Skipping hyperlink fetch for large range '%s' (%d cells > %d limit)",
hyperlink_range,
cell_count,
MAX_HYPERLINK_FETCH_CELLS,
)
hyperlink_section, notes_section = await _fetch_grid_metadata(
service,
spreadsheet_id,
resolved_range,
values,
include_hyperlinks=include_hyperlinks,
include_notes=include_notes,
)

detailed_errors_section = ""
if _values_contain_sheets_errors(values):
Expand Down Expand Up @@ -277,7 +250,7 @@ async def read_sheet_values(
)

logger.info(f"Successfully read {len(values)} rows for {user_google_email}.")
return text_output + hyperlink_section + detailed_errors_section
return text_output + hyperlink_section + notes_section + detailed_errors_section


@server.tool()
Expand Down
Loading