refactor(docker, utils): streamline Dockerfile and update PDF build script

This change removes unnecessary dependencies and optimizes the PDF build
process, enhancing maintainability and reducing potential errors.

- Removed `python3-pypandoc` from Dockerfile to minimize dependency
  footprint.
- Replaced `texlive-xetex` with `texlive` in Dockerfile for broader TeX
  support.
- Introduced `subprocess` in `build_pdf.py` to replace `pypandoc`,
  improving process control.
- Added `run_process_with_params` function to handle command execution,
  increasing code modularity and error handling.
- Created `build_pdf.old.py` as a backup of the original script for
  reference.

Signed-off-by: Glenn Y. Rolland <glenux@glenux.net>
This commit is contained in:
Glenn Y. Rolland 2025-03-27 15:23:11 +01:00
parent 7aabf1235e
commit c2893c30a6
3 changed files with 139 additions and 50 deletions

View file

@ -5,7 +5,7 @@ ENV DOCMACHINE_DOCS_ENABLE=1
ENV DOCMACHINE_SLIDES_ENABLE=1
RUN apt-get update \
&& apt-get install -y --no-install-recommends python3 python3-pip python-is-python3 pipenv python3-pypandoc \
&& apt-get install -y --no-install-recommends python3 python3-pip python-is-python3 pipenv \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& truncate -s 0 /var/log/*log
@ -19,7 +19,7 @@ RUN apt-get update \
# Tools for building pdfs
RUN apt-get update \
&& apt-get install -y --no-install-recommends make m4 chromium pandoc ghc libghc-pandoc-dev lmodern texlive-xetex texlive-fonts-extra texlive-fonts-recommended librsvg2-bin fonts-noto-mono \
&& apt-get install -y --no-install-recommends make m4 chromium pandoc ghc libghc-pandoc-dev lmodern texlive texlive-xetex texlive-fonts-extra texlive-fonts-recommended librsvg2-bin fonts-noto-mono \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
&& truncate -s 0 /var/log/*log

71
utils/docs/build_pdf.old.py Executable file
View file

@ -0,0 +1,71 @@
#!/usr/bin/env python
# Usage: ./build.py INPUT_DIR OUTPUT_FILE
import os
# import re
import sys
import pypandoc
def gx_usage():
"""Show usage"""
print(f"Usage: {sys.argv[0]} METADATA_FILE INPUT_FILE OUTPUT_FILE")
print()
if len(sys.argv) != 4:
gx_usage()
metadata_file = sys.argv[1]
if not os.path.exists(metadata_file):
print(f"Metadata file not found: {metadata_file}")
sys.exit(1)
input_file = sys.argv[2]
if not os.path.exists(input_file):
print(f"Input file not found: {input_file}")
sys.exit(1)
# Get second
output_file = sys.argv[3]
if os.path.exists(output_file):
print(f"Output file already exists: {output_file}")
sys.exit(1)
print(f"Metadata: {metadata_file}")
print(f"Input: {input_file}")
print(f"Output: {output_file}")
# Build the pandoc options as a string
pandoc_cmd = [
"--verbose",
"--toc",
"--number-sections",
"--include-in-header", "utils/docs/main.tex",
"--metadata-file", metadata_file,
# "-V", "linkcolor:blue",
# "-V", "geometry:a4paper",
# "-V", "geometry:margin=1.8cm",
"-V", "mainfont=DejaVu Serif",
"-V", "monofont=Noto Sans Mono",
"--pdf-engine=xelatex",
"--resource-path=utils/docs",
"--filter=./utils/docs/filter-nobg.hs",
]
# from glob import glob
# input_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_dir) for f in filenames if re.search(r'^[0-9].*\.md$', f)]
# input_files.sort()
# Convert all markdown files in the chapters/ subdirectory.
pypandoc.convert_file(
input_file,
"pdf",
outputfile=output_file,
extra_args=pandoc_cmd,
)
print(f"Conversion completed. Output saved to: {output_file}")
#

View file

@ -2,11 +2,10 @@
# Usage: ./build.py INPUT_DIR OUTPUT_FILE
import os
# import re
import os
import sys
import pypandoc
import subprocess
def gx_usage():
"""Show usage"""
@ -14,58 +13,77 @@ def gx_usage():
print()
if len(sys.argv) != 4:
gx_usage()
def run_process_with_params(command_params):
"""
Runs a process with parameters provided in the command_params list.
metadata_file = sys.argv[1]
if not os.path.exists(metadata_file):
print(f"Metadata file not found: {metadata_file}")
sys.exit(1)
Args:
- command_params (list): List of parameters to run the process.
The first item in the list should be the command (executable).
"""
try:
# Run the process using subprocess.run (Python 3.5+)
result = subprocess.run(command_params, check=True, text=True, capture_output=True)
# If the process ran successfully, print the output
print("Process output:", result.stdout)
print("Process error output (if any):", result.stderr)
except subprocess.CalledProcessError as e:
print(f"An error occurred while running the process: {e}")
print("Error Output:", e.stderr)
except Exception as e:
print(f"Unexpected error: {e}")
input_file = sys.argv[2]
if not os.path.exists(input_file):
print(f"Input file not found: {input_file}")
sys.exit(1)
# Example usage
if __name__ == "__main__":
# Define the parameters for the command
# Example: Running `ls -l /home` on a Unix-like system
if len(sys.argv) != 4:
gx_usage()
# Get second
output_file = sys.argv[3]
if os.path.exists(output_file):
print(f"Output file already exists: {output_file}")
sys.exit(1)
metadata_file = sys.argv[1]
if not os.path.exists(metadata_file):
print(f"Metadata file not found: {metadata_file}")
sys.exit(1)
print(f"Metadata: {metadata_file}")
print(f"Input: {input_file}")
print(f"Output: {output_file}")
input_file = sys.argv[2]
if not os.path.exists(input_file):
print(f"Input file not found: {input_file}")
sys.exit(1)
# Build the pandoc options as a string
pandoc_cmd = [
"--verbose",
"--toc",
"--number-sections",
"--include-in-header", "utils/docs/main.tex",
"--metadata-file", metadata_file,
# "-V", "linkcolor:blue",
# "-V", "geometry:a4paper",
# "-V", "geometry:margin=1.8cm",
"-V", "mainfont=DejaVu Serif",
"-V", "monofont=Noto Sans Mono",
"--pdf-engine=xelatex",
"--resource-path=utils/docs",
"--filter=./utils/docs/filter-nobg.hs",
]
# Get second
output_file = sys.argv[3]
if os.path.exists(output_file):
print(f"Output file already exists: {output_file}")
sys.exit(1)
# from glob import glob
# input_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_dir) for f in filenames if re.search(r'^[0-9].*\.md$', f)]
# input_files.sort()
# Build the pandoc options as a string
pandoc_cmd = [
"pandoc",
"--verbose",
"--toc",
"--number-sections",
"--include-in-header", "utils/docs/main.tex",
"--metadata-file", metadata_file,
# "-V", "linkcolor:blue",
# "-V", "geometry:a4paper",
# "-V", "geometry:margin=1.8cm",
"-V", "mainfont=DejaVu Serif",
"-V", "monofont=Noto Sans Mono",
"--pdf-engine=xelatex",
"--resource-path=utils/docs",
"--filter=./utils/docs/filter-nobg.hs",
"-f", "markdown",
"-t", "pdf",
"-o", output_file,
input_file
]
# Convert all markdown files in the chapters/ subdirectory.
pypandoc.convert_file(
input_file,
"pdf",
outputfile=output_file,
extra_args=pandoc_cmd,
)
print(f"Metadata: {metadata_file}")
print(f"Input: {input_file}")
print(f"Output: {output_file}")
print(f"Conversion completed. Output saved to: {output_file}")
# Call the function to run the process
run_process_with_params(pandoc_cmd)
#