Add possibilty to create table of contents for docx improted articles

This commit is contained in:
Jason Streifling 2025-02-04 17:48:54 +01:00
parent 951949f98d
commit 4a2aed5bcf
2 changed files with 37 additions and 1 deletions

View File

@ -0,0 +1,33 @@
-- Helper function: remove all image inlines from a list of inlines.
local function remove_images(inlines)
local result = {}
for _, item in ipairs(inlines) do
if item.t ~= "Image" then
table.insert(result, item)
end
end
return result
end
-- Build a bullet list representing the table of contents.
local function build_toc(doc)
local toc_items = {}
for _, block in ipairs(doc.blocks) do
if block.t == "Header" then
local clean_inlines = remove_images(block.content)
local header_text = pandoc.utils.stringify(clean_inlines)
if header_text ~= "" then
local link = pandoc.Link(clean_inlines, "#" .. block.identifier)
table.insert(toc_items, { link })
end
end
end
return pandoc.BulletList(toc_items)
end
-- The Pandoc function runs after the document is fully constructed.
function Pandoc(doc)
local toc = build_toc(doc)
table.insert(doc.blocks, 1, toc) -- Insert the TOC at the very beginning of the document.
return doc
end

View File

@ -21,7 +21,7 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
defer os.RemoveAll(tmpDir)
articleFileName := filepath.Join(os.TempDir(), fmt.Sprint(uuid.New(), ".md"))
cmd := exec.Command("pandoc", "-s", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
cmd := exec.Command("pandoc", "-s", "--lua-filter=cmd/backend/create_toc.lua", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
cmd.Stderr = &stderr
if err = cmd.Run(); err != nil {
return nil, fmt.Errorf("error converting docx to markdown: %v: %v", err, stderr.String())
@ -33,6 +33,9 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
return nil, fmt.Errorf("error reading markdown file: %v", err)
}
re := regexp.MustCompile(`\{width=[^}]+height=[^}]+\}`)
articleContent = re.ReplaceAll(articleContent, []byte(""))
imageNames, err := filepath.Glob(filepath.Join(tmpDir, "media", "*"))
if err != nil {
return nil, fmt.Errorf("error getting docx images from temporary directory: %v", err)