From c2893c30a6edfcc1d2b091ff4c2511363319d390 Mon Sep 17 00:00:00 2001
From: "Glenn Y. Rolland" <glenux@glenux.net>
Date: Thu, 27 Mar 2025 15:23:11 +0100
Subject: [PATCH] refactor(docker, utils): streamline Dockerfile and update PDF
 build script

This change removes unnecessary dependencies and optimizes the PDF build
process, enhancing maintainability and reducing potential errors.

- Removed `python3-pypandoc` from Dockerfile to minimize dependency
  footprint.
- Replaced `texlive-xetex` with `texlive` in Dockerfile for broader TeX
  support.
- Introduced `subprocess` in `build_pdf.py` to replace `pypandoc`,
  improving process control.
- Added `run_process_with_params` function to handle command execution,
  increasing code modularity and error handling.
- Created `build_pdf.old.py` as a backup of the original script for
  reference.

Signed-off-by: Glenn Y. Rolland <glenux@glenux.net>
---
 docker/Dockerfile           |   4 +-
 utils/docs/build_pdf.old.py |  71 ++++++++++++++++++++++
 utils/docs/build_pdf.py     | 114 +++++++++++++++++++++---------------
 3 files changed, 139 insertions(+), 50 deletions(-)
 create mode 100755 utils/docs/build_pdf.old.py

diff --git a/docker/Dockerfile b/docker/Dockerfile
index 3ce51a0..ed3fc04 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -5,7 +5,7 @@ ENV DOCMACHINE_DOCS_ENABLE=1
 ENV DOCMACHINE_SLIDES_ENABLE=1
 
 RUN apt-get update \
- && apt-get install -y --no-install-recommends python3 python3-pip python-is-python3 pipenv python3-pypandoc \
+ && apt-get install -y --no-install-recommends python3 python3-pip python-is-python3 pipenv \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
  && truncate -s 0 /var/log/*log
@@ -19,7 +19,7 @@ RUN apt-get update \
 
 # Tools for building pdfs
 RUN apt-get update \
- && apt-get install -y --no-install-recommends make m4 chromium pandoc ghc libghc-pandoc-dev lmodern texlive-xetex texlive-fonts-extra texlive-fonts-recommended librsvg2-bin fonts-noto-mono \
+ && apt-get install -y --no-install-recommends make m4 chromium pandoc ghc libghc-pandoc-dev lmodern texlive texlive-xetex texlive-fonts-extra texlive-fonts-recommended librsvg2-bin fonts-noto-mono \
  && apt-get clean \
  && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \
  && truncate -s 0 /var/log/*log
diff --git a/utils/docs/build_pdf.old.py b/utils/docs/build_pdf.old.py
new file mode 100755
index 0000000..e4ba81a
--- /dev/null
+++ b/utils/docs/build_pdf.old.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+# Usage: ./build.py INPUT_DIR OUTPUT_FILE
+
+import os
+# import re
+import sys
+import pypandoc
+
+
+def gx_usage():
+    """Show usage"""
+    print(f"Usage: {sys.argv[0]} METADATA_FILE INPUT_FILE OUTPUT_FILE")
+    print()
+
+
+if len(sys.argv) != 4:
+    gx_usage()
+
+metadata_file = sys.argv[1]
+if not os.path.exists(metadata_file):
+    print(f"Metadata file not found: {metadata_file}")
+    sys.exit(1)
+
+input_file = sys.argv[2]
+if not os.path.exists(input_file):
+    print(f"Input file not found: {input_file}")
+    sys.exit(1)
+
+# Get second
+output_file = sys.argv[3]
+if os.path.exists(output_file):
+    print(f"Output file already exists: {output_file}")
+    sys.exit(1)
+
+print(f"Metadata: {metadata_file}")
+print(f"Input: {input_file}")
+print(f"Output: {output_file}")
+
+# Build the pandoc options as a string
+pandoc_cmd = [
+    "--verbose",
+    "--toc",
+    "--number-sections",
+    "--include-in-header", "utils/docs/main.tex",
+    "--metadata-file", metadata_file,
+    # "-V", "linkcolor:blue",
+    # "-V", "geometry:a4paper",
+    # "-V", "geometry:margin=1.8cm",
+    "-V", "mainfont=DejaVu Serif",
+    "-V", "monofont=Noto Sans Mono",
+    "--pdf-engine=xelatex",
+    "--resource-path=utils/docs",
+    "--filter=./utils/docs/filter-nobg.hs",
+]
+
+# from glob import glob
+# input_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_dir) for f in filenames if re.search(r'^[0-9].*\.md$', f)]
+# input_files.sort()
+
+# Convert all markdown files in the chapters/ subdirectory.
+pypandoc.convert_file(
+    input_file,
+    "pdf",
+    outputfile=output_file,
+    extra_args=pandoc_cmd,
+)
+
+print(f"Conversion completed. Output saved to: {output_file}")
+
+#
diff --git a/utils/docs/build_pdf.py b/utils/docs/build_pdf.py
index e4ba81a..4313a2c 100755
--- a/utils/docs/build_pdf.py
+++ b/utils/docs/build_pdf.py
@@ -2,11 +2,10 @@
 
 # Usage: ./build.py INPUT_DIR OUTPUT_FILE
 
-import os
 # import re
+import os
 import sys
-import pypandoc
-
+import subprocess
 
 def gx_usage():
     """Show usage"""
@@ -14,58 +13,77 @@ def gx_usage():
     print()
 
 
-if len(sys.argv) != 4:
-    gx_usage()
+def run_process_with_params(command_params):
+    """
+    Runs a process with parameters provided in the command_params list.
 
-metadata_file = sys.argv[1]
-if not os.path.exists(metadata_file):
-    print(f"Metadata file not found: {metadata_file}")
-    sys.exit(1)
+    Args:
+    - command_params (list): List of parameters to run the process. 
+      The first item in the list should be the command (executable).
+    """
+    try:
+        # Run the process using subprocess.run (Python 3.5+)
+        result = subprocess.run(command_params, check=True, text=True, capture_output=True)
+        
+        # If the process ran successfully, print the output
+        print("Process output:", result.stdout)
+        print("Process error output (if any):", result.stderr)
+    except subprocess.CalledProcessError as e:
+        print(f"An error occurred while running the process: {e}")
+        print("Error Output:", e.stderr)
+    except Exception as e:
+        print(f"Unexpected error: {e}")
 
-input_file = sys.argv[2]
-if not os.path.exists(input_file):
-    print(f"Input file not found: {input_file}")
-    sys.exit(1)
+# Example usage
+if __name__ == "__main__":
+    # Define the parameters for the command
+    # Example: Running `ls -l /home` on a Unix-like system
+    if len(sys.argv) != 4:
+        gx_usage()
 
-# Get second
-output_file = sys.argv[3]
-if os.path.exists(output_file):
-    print(f"Output file already exists: {output_file}")
-    sys.exit(1)
+    metadata_file = sys.argv[1]
+    if not os.path.exists(metadata_file):
+        print(f"Metadata file not found: {metadata_file}")
+        sys.exit(1)
 
-print(f"Metadata: {metadata_file}")
-print(f"Input: {input_file}")
-print(f"Output: {output_file}")
+    input_file = sys.argv[2]
+    if not os.path.exists(input_file):
+        print(f"Input file not found: {input_file}")
+        sys.exit(1)
 
-# Build the pandoc options as a string
-pandoc_cmd = [
-    "--verbose",
-    "--toc",
-    "--number-sections",
-    "--include-in-header", "utils/docs/main.tex",
-    "--metadata-file", metadata_file,
-    # "-V", "linkcolor:blue",
-    # "-V", "geometry:a4paper",
-    # "-V", "geometry:margin=1.8cm",
-    "-V", "mainfont=DejaVu Serif",
-    "-V", "monofont=Noto Sans Mono",
-    "--pdf-engine=xelatex",
-    "--resource-path=utils/docs",
-    "--filter=./utils/docs/filter-nobg.hs",
-]
+    # Get second
+    output_file = sys.argv[3]
+    if os.path.exists(output_file):
+        print(f"Output file already exists: {output_file}")
+        sys.exit(1)
 
-# from glob import glob
-# input_files = [os.path.join(dp, f) for dp, dn, filenames in os.walk(input_dir) for f in filenames if re.search(r'^[0-9].*\.md$', f)]
-# input_files.sort()
+    # Build the pandoc options as a string
+    pandoc_cmd = [
+        "pandoc",
+        "--verbose",
+        "--toc",
+        "--number-sections",
+        "--include-in-header", "utils/docs/main.tex",
+        "--metadata-file", metadata_file,
+        # "-V", "linkcolor:blue",
+        # "-V", "geometry:a4paper",
+        # "-V", "geometry:margin=1.8cm",
+        "-V", "mainfont=DejaVu Serif",
+        "-V", "monofont=Noto Sans Mono",
+        "--pdf-engine=xelatex",
+        "--resource-path=utils/docs",
+        "--filter=./utils/docs/filter-nobg.hs",
+        "-f", "markdown",
+        "-t", "pdf",
+        "-o", output_file,
+        input_file
+    ]
 
-# Convert all markdown files in the chapters/ subdirectory.
-pypandoc.convert_file(
-    input_file,
-    "pdf",
-    outputfile=output_file,
-    extra_args=pandoc_cmd,
-)
+    print(f"Metadata: {metadata_file}")
+    print(f"Input: {input_file}")
+    print(f"Output: {output_file}")
 
-print(f"Conversion completed. Output saved to: {output_file}")
+    # Call the function to run the process
+    run_process_with_params(pandoc_cmd)
 
 #