Add possibilty to create table of contents for docx improted articles
This commit is contained in:
parent
951949f98d
commit
4a2aed5bcf
33
cmd/backend/create_toc.lua
Normal file
33
cmd/backend/create_toc.lua
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
-- Helper function: remove all image inlines from a list of inlines.
|
||||||
|
local function remove_images(inlines)
|
||||||
|
local result = {}
|
||||||
|
for _, item in ipairs(inlines) do
|
||||||
|
if item.t ~= "Image" then
|
||||||
|
table.insert(result, item)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Build a bullet list representing the table of contents.
|
||||||
|
local function build_toc(doc)
|
||||||
|
local toc_items = {}
|
||||||
|
for _, block in ipairs(doc.blocks) do
|
||||||
|
if block.t == "Header" then
|
||||||
|
local clean_inlines = remove_images(block.content)
|
||||||
|
local header_text = pandoc.utils.stringify(clean_inlines)
|
||||||
|
if header_text ~= "" then
|
||||||
|
local link = pandoc.Link(clean_inlines, "#" .. block.identifier)
|
||||||
|
table.insert(toc_items, { link })
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return pandoc.BulletList(toc_items)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- The Pandoc function runs after the document is fully constructed.
|
||||||
|
function Pandoc(doc)
|
||||||
|
local toc = build_toc(doc)
|
||||||
|
table.insert(doc.blocks, 1, toc) -- Insert the TOC at the very beginning of the document.
|
||||||
|
return doc
|
||||||
|
end
|
@ -21,7 +21,7 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
|
|||||||
defer os.RemoveAll(tmpDir)
|
defer os.RemoveAll(tmpDir)
|
||||||
|
|
||||||
articleFileName := filepath.Join(os.TempDir(), fmt.Sprint(uuid.New(), ".md"))
|
articleFileName := filepath.Join(os.TempDir(), fmt.Sprint(uuid.New(), ".md"))
|
||||||
cmd := exec.Command("pandoc", "-s", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
|
cmd := exec.Command("pandoc", "-s", "--lua-filter=cmd/backend/create_toc.lua", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
|
||||||
cmd.Stderr = &stderr
|
cmd.Stderr = &stderr
|
||||||
if err = cmd.Run(); err != nil {
|
if err = cmd.Run(); err != nil {
|
||||||
return nil, fmt.Errorf("error converting docx to markdown: %v: %v", err, stderr.String())
|
return nil, fmt.Errorf("error converting docx to markdown: %v: %v", err, stderr.String())
|
||||||
@ -33,6 +33,9 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
|
|||||||
return nil, fmt.Errorf("error reading markdown file: %v", err)
|
return nil, fmt.Errorf("error reading markdown file: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
re := regexp.MustCompile(`\{width=[^}]+height=[^}]+\}`)
|
||||||
|
articleContent = re.ReplaceAll(articleContent, []byte(""))
|
||||||
|
|
||||||
imageNames, err := filepath.Glob(filepath.Join(tmpDir, "media", "*"))
|
imageNames, err := filepath.Glob(filepath.Join(tmpDir, "media", "*"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error getting docx images from temporary directory: %v", err)
|
return nil, fmt.Errorf("error getting docx images from temporary directory: %v", err)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user