feature/table-of-content-docx #2
							
								
								
									
										36
									
								
								cmd/backend/create_toc.lua
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								cmd/backend/create_toc.lua
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,36 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					-- Helper function: remove all image inlines from a list of inlines.
 | 
				
			||||||
 | 
					local function remove_images(inlines)
 | 
				
			||||||
 | 
					  local result = {}
 | 
				
			||||||
 | 
					  for _, item in ipairs(inlines) do
 | 
				
			||||||
 | 
					    if item.t ~= "Image" then
 | 
				
			||||||
 | 
					      table.insert(result, item)
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					  return result
 | 
				
			||||||
 | 
					end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Build a bullet list representing the table of contents.
 | 
				
			||||||
 | 
					local function build_toc(doc)
 | 
				
			||||||
 | 
					  local toc_items = {}
 | 
				
			||||||
 | 
					  for _, block in ipairs(doc.blocks) do
 | 
				
			||||||
 | 
					    if block.t == "Header" then
 | 
				
			||||||
 | 
					      local clean_inlines = remove_images(block.content)
 | 
				
			||||||
 | 
					      local header_text = pandoc.utils.stringify(clean_inlines)
 | 
				
			||||||
 | 
					      if header_text ~= "" then
 | 
				
			||||||
 | 
					        local link = pandoc.Link(clean_inlines, "#" .. block.identifier)
 | 
				
			||||||
 | 
					        table.insert(toc_items, { link })
 | 
				
			||||||
 | 
					      end
 | 
				
			||||||
 | 
					    end
 | 
				
			||||||
 | 
					  end
 | 
				
			||||||
 | 
					  return pandoc.BulletList(toc_items)
 | 
				
			||||||
 | 
					end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- The Pandoc function runs after the document is fully constructed.
 | 
				
			||||||
 | 
					function Pandoc(doc)
 | 
				
			||||||
 | 
					  local toc = build_toc(doc)
 | 
				
			||||||
 | 
					  -- Insert the TOC at the very beginning of the document.
 | 
				
			||||||
 | 
					  table.insert(doc.blocks, 1, toc)
 | 
				
			||||||
 | 
					  return doc
 | 
				
			||||||
 | 
					end
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -20,8 +20,9 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
	defer os.RemoveAll(tmpDir)
 | 
						defer os.RemoveAll(tmpDir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						// --toc
 | 
				
			||||||
	articleFileName := filepath.Join(os.TempDir(), fmt.Sprint(uuid.New(), ".md"))
 | 
						articleFileName := filepath.Join(os.TempDir(), fmt.Sprint(uuid.New(), ".md"))
 | 
				
			||||||
	cmd := exec.Command("pandoc", "-s", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
 | 
						cmd := exec.Command("pandoc", "-s", "--lua-filter=cmd/backend/create_toc.lua", "-f", "docx", "-t", "commonmark_x", "-o", articleFileName, "--extract-media", tmpDir, filename) // TODO: Is writing to a file necessary?
 | 
				
			||||||
	cmd.Stderr = &stderr
 | 
						cmd.Stderr = &stderr
 | 
				
			||||||
	if err = cmd.Run(); err != nil {
 | 
						if err = cmd.Run(); err != nil {
 | 
				
			||||||
		return nil, fmt.Errorf("error converting docx to markdown: %v: %v", err, stderr.String())
 | 
							return nil, fmt.Errorf("error converting docx to markdown: %v: %v", err, stderr.String())
 | 
				
			||||||
@@ -33,6 +34,9 @@ func ConvertToMarkdown(c *Config, filename string) ([]byte, error) {
 | 
				
			|||||||
		return nil, fmt.Errorf("error reading markdown file: %v", err)
 | 
							return nil, fmt.Errorf("error reading markdown file: %v", err)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						re := regexp.MustCompile(`\{width=[^}]+height=[^}]+\}`)
 | 
				
			||||||
 | 
						articleContent = re.ReplaceAll(articleContent, []byte(""))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	imageNames, err := filepath.Glob(filepath.Join(tmpDir, "media", "*"))
 | 
						imageNames, err := filepath.Glob(filepath.Join(tmpDir, "media", "*"))
 | 
				
			||||||
	if err != nil {
 | 
						if err != nil {
 | 
				
			||||||
		return nil, fmt.Errorf("error getting docx images from temporary directory: %v", err)
 | 
							return nil, fmt.Errorf("error getting docx images from temporary directory: %v", err)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user