Skip to content

Commit 5c80417

Browse files
author
Sebastian Flügge
committed
feat: add shallow headline parsing for headline search tools
Adds get_headlines_shallow() method to avoid expensive lazy loading when search tools need only title/tags/level data across all agenda files.
1 parent af0713d commit 5c80417

File tree

2 files changed

+234
-0
lines changed

2 files changed

+234
-0
lines changed

lua/orgmode/files/file.lua

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,112 @@ function OrgFile:find_headlines_by_title(title, exact)
261261
end, self:get_headlines())
262262
end
263263

264+
---Extract title from headline item node text (remove TODO keywords and priority)
265+
---@param item_text string
266+
---@return string title
267+
function OrgFile:_parse_headline_title(item_text)
268+
-- Remove TODO keywords
269+
local todo_keywords = config:get_todo_keywords():all_values()
270+
for _, keyword in ipairs(todo_keywords) do
271+
local pattern = '^' .. vim.pesc(keyword) .. '%s+'
272+
if item_text:match(pattern) then
273+
item_text = item_text:gsub(pattern, '')
274+
break
275+
end
276+
end
277+
278+
-- Remove priority - use dynamic priority range
279+
local prio_range = config:get_priority_range()
280+
local priority_pattern = '^%[#[' .. prio_range.highest .. '-' .. prio_range.lowest .. ']%]%s*'
281+
item_text = item_text:gsub(priority_pattern, '')
282+
283+
return vim.trim(item_text)
284+
end
285+
286+
---Extract shallow data from a single headline node for headline search.
287+
---Extracts only title, level, tags, and position - no complex object creation.
288+
---@param node TSNode
289+
---@return { title: string, level: number, line_number: number, all_tags: string[], is_archived: boolean }
290+
function OrgFile:_extract_shallow_headline_data(node)
291+
if not node then
292+
return { title = '', level = 0, line_number = 0, all_tags = {}, is_archived = false }
293+
end
294+
295+
-- Extract level from stars
296+
local stars_node = node:field('stars')[1]
297+
local level = stars_node and select(2, stars_node:end_()) or 0
298+
299+
-- Extract title from item node
300+
local item_node = node:field('item')[1]
301+
local title = ''
302+
if item_node then
303+
local item_text = self:get_node_text(item_node) or ''
304+
title = self:_parse_headline_title(item_text)
305+
end
306+
307+
-- Extract tags from tags node using existing utils
308+
local tags_node = node:field('tags')[1]
309+
local all_tags = {}
310+
local is_archived = false
311+
if tags_node then
312+
local tags_text = self:get_node_text(tags_node) or ''
313+
all_tags = utils.parse_tags_string(tags_text)
314+
-- Check for archive tag using consistent pattern
315+
for _, tag in ipairs(all_tags) do
316+
if tag:upper() == 'ARCHIVE' then
317+
is_archived = true
318+
break
319+
end
320+
end
321+
end
322+
323+
-- Get line number
324+
local start_row = node:start()
325+
local line_number = start_row + 1
326+
327+
return {
328+
title = title,
329+
level = level,
330+
line_number = line_number,
331+
all_tags = all_tags,
332+
is_archived = is_archived,
333+
}
334+
end
335+
336+
---Extract shallow headline data for fast headline search across agenda files.
337+
---
338+
---Why shallow extraction? Headline search tools (telescope, fzf.lua, snacks.picker, etc.)
339+
---need title/tags/level for ALL headlines across ALL agenda files. Creating full OrgHeadline
340+
---objects triggers expensive lazy loading that search tools immediately consume anyway.
341+
---This pre-computes only the search-relevant data in a single efficient pass.
342+
---
343+
---@param opts? { archived: boolean, max_depth: number }
344+
---@return { title: string, level: number, line_number: number, all_tags: string[], is_archived: boolean }[]
345+
function OrgFile:get_headlines_shallow(opts)
346+
if self:is_archive_file() and not (opts and opts.archived) then
347+
return {}
348+
end
349+
350+
self:parse()
351+
if not self.root then
352+
return {}
353+
end
354+
355+
local matches = self:get_ts_captures('(section (headline) @headline)')
356+
local results = vim.tbl_map(function(node)
357+
return self:_extract_shallow_headline_data(node)
358+
end, matches)
359+
360+
-- Apply max_depth filtering if specified
361+
if opts and opts.max_depth then
362+
results = vim.tbl_filter(function(headline)
363+
return headline.level <= opts.max_depth
364+
end, results)
365+
end
366+
367+
return results
368+
end
369+
264370
---@param title string
265371
---@return OrgHeadline | nil
266372
function OrgFile:find_headline_by_title(title)

tests/plenary/files/file_spec.lua

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,134 @@ describe('OrgFile', function()
831831
end)
832832
end)
833833

834+
describe('get_headlines_shallow', function()
835+
it('should get all headlines with correct structure', function()
836+
local file = load_file_sync({
837+
'* TODO Headline 1',
838+
'** [#A] Priority Headline :tag1:tag2:',
839+
'*** DONE Archived Headline :ARCHIVE:',
840+
'* Plain Headline',
841+
})
842+
843+
local headlines = file:get_headlines_shallow()
844+
845+
assert.are.same(4, #headlines)
846+
847+
-- Verify data structure
848+
local headline = headlines[1]
849+
assert.is.not_nil(headline.title)
850+
assert.is.not_nil(headline.level)
851+
assert.is.not_nil(headline.line_number)
852+
assert.is.not_nil(headline.all_tags)
853+
assert.is.not_nil(headline.is_archived)
854+
855+
-- Verify specific values
856+
assert.are.same('Headline 1', headlines[1].title)
857+
assert.are.same(1, headlines[1].level)
858+
assert.are.same(1, headlines[1].line_number)
859+
assert.are.same({}, headlines[1].all_tags)
860+
assert.is.False(headlines[1].is_archived)
861+
862+
assert.are.same('Priority Headline', headlines[2].title)
863+
assert.are.same(2, headlines[2].level)
864+
assert.are.same({ 'tag1', 'tag2' }, headlines[2].all_tags)
865+
866+
assert.are.same('Archived Headline', headlines[3].title)
867+
assert.are.same({ 'ARCHIVE' }, headlines[3].all_tags)
868+
assert.is.True(headlines[3].is_archived)
869+
end)
870+
871+
it('should return same count as get_headlines for regular files', function()
872+
local file = load_file_sync({
873+
'* TODO Headline 1',
874+
'* TODO Headline 2',
875+
'** Headline 2.1',
876+
'*** Headline 2.1.1',
877+
'* DONE Headline 3',
878+
})
879+
880+
local shallow = file:get_headlines_shallow()
881+
local heavy = file:get_headlines()
882+
883+
assert.are.same(#heavy, #shallow)
884+
end)
885+
886+
-- Parameterized archive tests
887+
local archive_test_cases = {
888+
{
889+
name = 'regular file without archived option',
890+
filename = nil, -- will use .org
891+
opts = {},
892+
content = { '* Headline', '* Archived :ARCHIVE:' },
893+
expected_count = 2,
894+
},
895+
{
896+
name = 'archive file without archived option',
897+
filename = 'test.org_archive',
898+
opts = {},
899+
content = { '* Headline', '* Archived :ARCHIVE:' },
900+
expected_count = 0,
901+
},
902+
{
903+
name = 'archive file with archived=true',
904+
filename = 'test.org_archive',
905+
opts = { archived = true },
906+
content = { '* Headline', '* Archived :ARCHIVE:' },
907+
expected_count = 2,
908+
},
909+
}
910+
911+
for _, case in ipairs(archive_test_cases) do
912+
it('should handle archives: ' .. case.name, function()
913+
local filename = case.filename and (vim.fn.tempname() .. case.filename) or nil
914+
local file = load_file_sync(case.content, filename)
915+
local headlines = file:get_headlines_shallow(case.opts)
916+
assert.are.same(case.expected_count, #headlines)
917+
end)
918+
end
919+
920+
it('should respect max_depth filtering', function()
921+
local file = load_file_sync({
922+
'* Level 1',
923+
'** Level 2',
924+
'*** Level 3',
925+
'**** Level 4',
926+
})
927+
928+
local all_headlines = file:get_headlines_shallow()
929+
local depth_2 = file:get_headlines_shallow({ max_depth = 2 })
930+
931+
assert.are.same(4, #all_headlines)
932+
assert.are.same(2, #depth_2)
933+
end)
934+
935+
it('should match get_headlines filtering behavior', function()
936+
local content = {
937+
'* TODO Headline 1',
938+
'** Headline 1.1',
939+
'*** TODO Headline 1.1.1 :ARCHIVE:',
940+
'**** Headline 1.1.1.1',
941+
'* DONE Headline 2',
942+
}
943+
local file = load_file_sync(content)
944+
945+
-- Compare filtering with different options
946+
local shallow_all = file:get_headlines_shallow()
947+
local heavy_all = file:get_headlines()
948+
assert.are.same(#heavy_all, #shallow_all)
949+
950+
local shallow_archived = file:get_headlines_shallow({ archived = true })
951+
local heavy_archived = file:get_headlines_including_archived()
952+
assert.are.same(#heavy_archived, #shallow_archived)
953+
end)
954+
955+
it('should handle empty files gracefully', function()
956+
local file = load_file_sync({})
957+
local headlines = file:get_headlines_shallow()
958+
assert.are.same(0, #headlines)
959+
end)
960+
end)
961+
834962
describe('get_todos', function()
835963
local has_correct_type = function(todos)
836964
assert.are.same('TODO', todos.todo_keywords[1].type)

0 commit comments

Comments
 (0)