docmachine-containers/utils/docs/build_pdf.old.py
Glenn Y. Rolland c2893c30a6 refactor(docker, utils): streamline Dockerfile and update PDF build script
This change removes unnecessary dependencies and optimizes the PDF build
process, enhancing maintainability and reducing potential errors.

- Removed `python3-pypandoc` from Dockerfile to minimize dependency
  footprint.
- Replaced `texlive-xetex` with `texlive` in Dockerfile for broader TeX
  support.
- Introduced `subprocess` in `build_pdf.py` to replace `pypandoc`,
  improving process control.
- Added `run_process_with_params` function to handle command execution,
  increasing code modularity and error handling.
- Created `build_pdf.old.py` as a backup of the original script for
  reference.

Signed-off-by: Glenn Y. Rolland <glenux@glenux.net>
2025-03-27 15:23:11 +01:00

71 lines
1.6 KiB
Python
Executable file

#!/usr/bin/env python
# Usage: ./build.py INPUT_DIR OUTPUT_FILE
import os
# import re
import sys
import pypandoc
def gx_usage():
"""Show usage"""
print(f"Usage: {sys.argv[0]} METADATA_FILE INPUT_FILE OUTPUT_FILE")
print()
if len(sys.argv) != 4:
gx_usage()
metadata_file = sys.argv[1]
if not os.path.exists(metadata_file):
print(f"Metadata file not found: {metadata_file}")
sys.exit(1)
input_file = sys.argv[2]
if not os.path.exists(input_file):
print(f"Input file not found: {input_file}")
sys.exit(1)
# Get second
output_file = sys.argv[3]
if os.path.exists(output_file):
print(f"Output file already exists: {output_file}")
sys.exit(1)
print(f"Metadata: {metadata_file}")
print(f"Input: {input_file}")
print(f"Output: {output_file}")
# Build the pandoc options as a string
pandoc_cmd = [
"--verbose",
"--toc",
"--number-sections",
"--include-in-header", "utils/docs/main.tex",
"--metadata-file", metadata_file,
# "-V", "linkcolor:blue",
# "-V", "geometry:a4paper",
# "-V", "geometry:margin=1.8cm",
"-V", "mainfont=DejaVu Serif",
"-V", "monofont=Noto Sans Mono",
"--pdf-engine=xelatex",
"--resource-path=utils/docs",
"--filter=./utils/docs/filter-nobg.hs",
]
# from glob import glob
# input_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_dir) for f in filenames if re.search(r'^[0-9].*\.md$', f)]
# input_files.sort()
# Convert all markdown files in the chapters/ subdirectory.
pypandoc.convert_file(
input_file,
"pdf",
outputfile=output_file,
extra_args=pandoc_cmd,
)
print(f"Conversion completed. Output saved to: {output_file}")
#