feature/table-of-content-docx #2
							
								
								
									
										36
									
								
								cmd/backend/create_toc.lua
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								cmd/backend/create_toc.lua
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
			
		||||
 | 
			
		||||
-- Helper function: remove all image inlines from a list of inlines.
 | 
			
		||||
local function remove_images(inlines)
 | 
			
		||||
  local result = {}
 | 
			
		||||
  for _, item in ipairs(inlines) do
 | 
			
		||||
    if item.t ~= "Image" then
 | 
			
		||||
      table.insert(result, item)
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
  return result
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
-- Build a bullet list representing the table of contents.
 | 
			
		||||
local function build_toc(doc)
 | 
			
		||||
  local toc_items = {}
 | 
			
		||||
  for _, block in ipairs(doc.blocks) do
 | 
			
		||||
    if block.t == "Header" then
 | 
			
		||||
      local clean_inlines = remove_images(block.content)
 | 
			
		||||
      local header_text = pandoc.utils.stringify(clean_inlines)
 | 
			
		||||
      if header_text ~= "" then
 | 
			
		||||
        local link = pandoc.Link(clean_inlines, "#" .. block.identifier)
 | 
			
		||||
        table.insert(toc_items, { link })
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
  return pandoc.BulletList(toc_items)
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
-- The Pandoc function runs after the document is fully constructed.
 | 
			
		||||
function Pandoc(doc)
 | 
			
		||||
  local toc = build_toc(doc)
 | 
			
		||||
  -- Insert the TOC at the very beginning of the document.
 | 
			
		||||
  table.insert(doc.blocks, 1, toc)
 | 
			
		||||
  return doc
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
@@ -20,8 +20,9 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
 | 
			
		||||
	}
 | 
			
		||||
	defer os.RemoveAll(tmpDir)
 | 
			
		||||
 | 
			
		||||
	// --toc
 | 
			
		||||
	articleFileName := filepath.Join(os.TempDir(), fmt.Sprint(uuid.New(), ".md"))
 | 
			
		||||
	cmd := exec.Command("pandoc", "-s", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
 | 
			
		||||
	cmd := exec.Command("pandoc", "-s", "--lua-filter=cmd/backend/create_toc.lua", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
 | 
			
		||||
	cmd.Stderr = &stderr
 | 
			
		||||
	if err = cmd.Run(); err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("error converting docx to markdown: %v: %v", err, stderr.String())
 | 
			
		||||
@@ -33,6 +34,9 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
 | 
			
		||||
		return nil, fmt.Errorf("error reading markdown file: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	re := regexp.MustCompile(`\{width=[^}]+height=[^}]+\}`)
 | 
			
		||||
	articleContent = re.ReplaceAll(articleContent, []byte(""))
 | 
			
		||||
 | 
			
		||||
	imageNames, err := filepath.Glob(filepath.Join(tmpDir, "media", "*"))
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("error getting docx images from temporary directory: %v", err)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user