From c2893c30a6edfcc1d2b091ff4c2511363319d390 Mon Sep 17 00:00:00 2001 From: "Glenn Y. Rolland" <glenux@glenux.net> Date: Thu, 27 Mar 2025 15:23:11 +0100 Subject: [PATCH] refactor(docker, utils): streamline Dockerfile and update PDF build script This change removes unnecessary dependencies and optimizes the PDF build process, enhancing maintainability and reducing potential errors. - Removed `python3-pypandoc` from Dockerfile to minimize dependency footprint. - Replaced `texlive-xetex` with `texlive` in Dockerfile for broader TeX support. - Introduced `subprocess` in `build_pdf.py` to replace `pypandoc`, improving process control. - Added `run_process_with_params` function to handle command execution, increasing code modularity and error handling. - Created `build_pdf.old.py` as a backup of the original script for reference. Signed-off-by: Glenn Y. Rolland <glenux@glenux.net> --- docker/Dockerfile | 4 +- utils/docs/build_pdf.old.py | 71 ++++++++++++++++++++++ utils/docs/build_pdf.py | 114 +++++++++++++++++++++--------------- 3 files changed, 139 insertions(+), 50 deletions(-) create mode 100755 utils/docs/build_pdf.old.py diff --git a/docker/Dockerfile b/docker/Dockerfile index 3ce51a0..ed3fc04 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -5,7 +5,7 @@ ENV DOCMACHINE_DOCS_ENABLE=1 ENV DOCMACHINE_SLIDES_ENABLE=1 RUN apt-get update \ - && apt-get install -y --no-install-recommends python3 python3-pip python-is-python3 pipenv python3-pypandoc \ + && apt-get install -y --no-install-recommends python3 python3-pip python-is-python3 pipenv \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ && truncate -s 0 /var/log/*log @@ -19,7 +19,7 @@ RUN apt-get update \ # Tools for building pdfs RUN apt-get update \ - && apt-get install -y --no-install-recommends make m4 chromium pandoc ghc libghc-pandoc-dev lmodern texlive-xetex texlive-fonts-extra texlive-fonts-recommended librsvg2-bin fonts-noto-mono \ + && apt-get install -y --no-install-recommends make m4 chromium pandoc ghc libghc-pandoc-dev lmodern texlive texlive-xetex texlive-fonts-extra texlive-fonts-recommended librsvg2-bin fonts-noto-mono \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ && truncate -s 0 /var/log/*log diff --git a/utils/docs/build_pdf.old.py b/utils/docs/build_pdf.old.py new file mode 100755 index 0000000..e4ba81a --- /dev/null +++ b/utils/docs/build_pdf.old.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +# Usage: ./build.py INPUT_DIR OUTPUT_FILE + +import os +# import re +import sys +import pypandoc + + +def gx_usage(): + """Show usage""" + print(f"Usage: {sys.argv[0]} METADATA_FILE INPUT_FILE OUTPUT_FILE") + print() + + +if len(sys.argv) != 4: + gx_usage() + +metadata_file = sys.argv[1] +if not os.path.exists(metadata_file): + print(f"Metadata file not found: {metadata_file}") + sys.exit(1) + +input_file = sys.argv[2] +if not os.path.exists(input_file): + print(f"Input file not found: {input_file}") + sys.exit(1) + +# Get second +output_file = sys.argv[3] +if os.path.exists(output_file): + print(f"Output file already exists: {output_file}") + sys.exit(1) + +print(f"Metadata: {metadata_file}") +print(f"Input: {input_file}") +print(f"Output: {output_file}") + +# Build the pandoc options as a string +pandoc_cmd = [ + "--verbose", + "--toc", + "--number-sections", + "--include-in-header", "utils/docs/main.tex", + "--metadata-file", metadata_file, + # "-V", "linkcolor:blue", + # "-V", "geometry:a4paper", + # "-V", "geometry:margin=1.8cm", + "-V", "mainfont=DejaVu Serif", + "-V", "monofont=Noto Sans Mono", + "--pdf-engine=xelatex", + "--resource-path=utils/docs", + "--filter=./utils/docs/filter-nobg.hs", +] + +# from glob import glob +# input_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_dir) for f in filenames if re.search(r'^[0-9].*\.md$', f)] +# input_files.sort() + +# Convert all markdown files in the chapters/ subdirectory. +pypandoc.convert_file( + input_file, + "pdf", + outputfile=output_file, + extra_args=pandoc_cmd, +) + +print(f"Conversion completed. Output saved to: {output_file}") + +# diff --git a/utils/docs/build_pdf.py b/utils/docs/build_pdf.py index e4ba81a..4313a2c 100755 --- a/utils/docs/build_pdf.py +++ b/utils/docs/build_pdf.py @@ -2,11 +2,10 @@ # Usage: ./build.py INPUT_DIR OUTPUT_FILE -import os # import re +import os import sys -import pypandoc - +import subprocess def gx_usage(): """Show usage""" @@ -14,58 +13,77 @@ def gx_usage(): print() -if len(sys.argv) != 4: - gx_usage() +def run_process_with_params(command_params): + """ + Runs a process with parameters provided in the command_params list. -metadata_file = sys.argv[1] -if not os.path.exists(metadata_file): - print(f"Metadata file not found: {metadata_file}") - sys.exit(1) + Args: + - command_params (list): List of parameters to run the process. + The first item in the list should be the command (executable). + """ + try: + # Run the process using subprocess.run (Python 3.5+) + result = subprocess.run(command_params, check=True, text=True, capture_output=True) + + # If the process ran successfully, print the output + print("Process output:", result.stdout) + print("Process error output (if any):", result.stderr) + except subprocess.CalledProcessError as e: + print(f"An error occurred while running the process: {e}") + print("Error Output:", e.stderr) + except Exception as e: + print(f"Unexpected error: {e}") -input_file = sys.argv[2] -if not os.path.exists(input_file): - print(f"Input file not found: {input_file}") - sys.exit(1) +# Example usage +if __name__ == "__main__": + # Define the parameters for the command + # Example: Running `ls -l /home` on a Unix-like system + if len(sys.argv) != 4: + gx_usage() -# Get second -output_file = sys.argv[3] -if os.path.exists(output_file): - print(f"Output file already exists: {output_file}") - sys.exit(1) + metadata_file = sys.argv[1] + if not os.path.exists(metadata_file): + print(f"Metadata file not found: {metadata_file}") + sys.exit(1) -print(f"Metadata: {metadata_file}") -print(f"Input: {input_file}") -print(f"Output: {output_file}") + input_file = sys.argv[2] + if not os.path.exists(input_file): + print(f"Input file not found: {input_file}") + sys.exit(1) -# Build the pandoc options as a string -pandoc_cmd = [ - "--verbose", - "--toc", - "--number-sections", - "--include-in-header", "utils/docs/main.tex", - "--metadata-file", metadata_file, - # "-V", "linkcolor:blue", - # "-V", "geometry:a4paper", - # "-V", "geometry:margin=1.8cm", - "-V", "mainfont=DejaVu Serif", - "-V", "monofont=Noto Sans Mono", - "--pdf-engine=xelatex", - "--resource-path=utils/docs", - "--filter=./utils/docs/filter-nobg.hs", -] + # Get second + output_file = sys.argv[3] + if os.path.exists(output_file): + print(f"Output file already exists: {output_file}") + sys.exit(1) -# from glob import glob -# input_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_dir) for f in filenames if re.search(r'^[0-9].*\.md$', f)] -# input_files.sort() + # Build the pandoc options as a string + pandoc_cmd = [ + "pandoc", + "--verbose", + "--toc", + "--number-sections", + "--include-in-header", "utils/docs/main.tex", + "--metadata-file", metadata_file, + # "-V", "linkcolor:blue", + # "-V", "geometry:a4paper", + # "-V", "geometry:margin=1.8cm", + "-V", "mainfont=DejaVu Serif", + "-V", "monofont=Noto Sans Mono", + "--pdf-engine=xelatex", + "--resource-path=utils/docs", + "--filter=./utils/docs/filter-nobg.hs", + "-f", "markdown", + "-t", "pdf", + "-o", output_file, + input_file + ] -# Convert all markdown files in the chapters/ subdirectory. -pypandoc.convert_file( - input_file, - "pdf", - outputfile=output_file, - extra_args=pandoc_cmd, -) + print(f"Metadata: {metadata_file}") + print(f"Input: {input_file}") + print(f"Output: {output_file}") -print(f"Conversion completed. Output saved to: {output_file}") + # Call the function to run the process + run_process_with_params(pandoc_cmd) #