From fb2b85fcb8d1914d8a5b0867600b8295b5649b9e Mon Sep 17 00:00:00 2001 From: Glenn Date: Tue, 2 Jan 2024 12:32:00 +0100 Subject: [PATCH] feat: improve files filtering --- shard.lock | 4 ++ shard.yml | 2 + spec/cli_spec.cr | 10 ++++ spec/config_spec.cr | 0 spec/filelist_data/alice.txt | 6 ++ spec/filelist_data/bob.c | 7 +++ spec/filelist_spec.cr | 104 +++++++++++++++++++++++++++++++++++ spec/spec_helper.cr | 4 ++ src/cli.cr | 84 ++++++++++------------------ src/config.cr | 38 ++++++++++--- src/filelist.cr | 94 +++++++++++++++++++++++++++++++ 11 files changed, 288 insertions(+), 65 deletions(-) create mode 100644 spec/cli_spec.cr create mode 100644 spec/config_spec.cr create mode 100644 spec/filelist_data/alice.txt create mode 100644 spec/filelist_data/bob.c create mode 100644 spec/filelist_spec.cr create mode 100644 spec/spec_helper.cr create mode 100644 src/filelist.cr diff --git a/shard.lock b/shard.lock index fc4f3d1..8396546 100644 --- a/shard.lock +++ b/shard.lock @@ -4,3 +4,7 @@ shards: git: https://github.com/dscottboggs/magic.cr.git version: 1.1.0 + walk: + git: https://github.com/alexherbo2/walk.cr.git + version: 0.1.0+git.commit.765d758c0f966cccc98c1d81c7ccd0c1f71928e3 + diff --git a/shard.yml b/shard.yml index f2fd335..c159cca 100644 --- a/shard.yml +++ b/shard.yml @@ -12,6 +12,8 @@ authors: dependencies: magic: github: dscottboggs/magic.cr + walk: + github: alexherbo2/walk.cr # description: | # Short description of chatgpt-preloader diff --git a/spec/cli_spec.cr b/spec/cli_spec.cr new file mode 100644 index 0000000..6824930 --- /dev/null +++ b/spec/cli_spec.cr @@ -0,0 +1,10 @@ + +require "./spec_helper" +require "../src/cli" + +describe CodePreloader::Cli do + + it "works" do + # false.should eq(true) + end +end diff --git a/spec/config_spec.cr b/spec/config_spec.cr new file mode 100644 index 0000000..e69de29 diff --git a/spec/filelist_data/alice.txt b/spec/filelist_data/alice.txt new file mode 100644 index 0000000..94c13c8 --- /dev/null +++ b/spec/filelist_data/alice.txt @@ -0,0 +1,6 @@ +# Alice + +## Who is alice? + +## When is alice? + diff --git a/spec/filelist_data/bob.c b/spec/filelist_data/bob.c new file mode 100644 index 0000000..f117f9c --- /dev/null +++ b/spec/filelist_data/bob.c @@ -0,0 +1,7 @@ + +#include + +int main(int argc, char** argv) { + printf("Hello world\n"); + return 0; +} diff --git a/spec/filelist_spec.cr b/spec/filelist_spec.cr new file mode 100644 index 0000000..6ba1a59 --- /dev/null +++ b/spec/filelist_spec.cr @@ -0,0 +1,104 @@ + +require "./spec_helper" +require "../src/cli" + +alias FileList = CodePreloader::FileList + +describe CodePreloader::FileList do + + it "can be created empty" do + fl = FileList.new + end + + it "can be created with a list of directories" do + fl = FileList.new(["src/", "spec/"]) + end + + it "verifies that initial directories exists" do + expect_raises(FileList::NotADirectory) do + fl = FileList.new(["Alice", "Bob"]) + end + end + + it "can append extra sources" do + fl = FileList.new() + fl.add "spec/" + end + + it "verifies that appended directories exists" do + fl = FileList.new() + expect_raises(FileList::NotADirectory) do + fl.add "Alice" + end + end + + it "accept adding reject filters" do + fl = FileList.new() + fl.reject { |item| !!(item =~ /name/) } + end + + it "accept adding select filters" do + fl = FileList.new() + fl.select { |item| !!(item =~ /name/) } + end + + it "enumerates the files" do + fl = FileList.new() + fl.add("spec/filelist_data") + + files = Dir["spec/filelist_data/*"] + fl.each do |file| + files.should contain(file) + files = files - [file] + end + files.size.should eq(0) + end + + it "doesn't enumerate duplicate files" do + fl = FileList.new() + fl.add("spec/filelist_data") + fl.add("spec/filelist_data") + + files = [] of String + fl.each do |file| + files << file + end + files.size.should eq(files.uniq.size) + end + + it "doesn't enumerate files filtered out by select" do + fl = FileList.new() + fl.add("spec/filelist_data") + fl.select { |path| !!(path =~ /\.c$/) } + + files = Dir["spec/filelist_data/*.c"] + fl.each do |file| + files.should contain(file) + files = files - [file] + end + files.size.should eq(0) + end + + it "doesn't enumerate files filtered out by reject" do + fl = FileList.new() + fl.add("spec/filelist_data") + fl.reject { |path| !!(path =~ /\.txt$/) } + + files = Dir["spec/filelist_data/*.c"] + fl.each do |file| + files.should contain(file) + files = files - [file] + end + files.size.should eq(0) + + end + + it "export the files as an array" do + end + + it "doesn't export duplicate files" do + end + + it "doesn't export filtered out files" do + end +end diff --git a/spec/spec_helper.cr b/spec/spec_helper.cr new file mode 100644 index 0000000..13c6efb --- /dev/null +++ b/spec/spec_helper.cr @@ -0,0 +1,4 @@ + +require "spec" + +# require "../src/" diff --git a/src/cli.cr b/src/cli.cr index 3ac9bb0..747912b 100644 --- a/src/cli.cr +++ b/src/cli.cr @@ -6,6 +6,7 @@ require "option_parser" require "magic" require "./config" +require "./filelist" # The CodePreloader module organizes classes and methods related to preloading code files. module CodePreloader @@ -20,90 +21,61 @@ module CodePreloader @config.parse_arguments(args) end - # Executes the main functionality of the CLI application. def exec - header_prompt = "" - footer_prompt = "" - __header_prompt_file_path = @config.header_prompt_file_path - __footer_prompt_file_path = @config.footer_prompt_file_path - __output_file_path = @output_file_path - __repository_path_list = @config.repository_path_list + # get local values for typing + output_file_path = @output_file_path + repository_path_list = @config.repository_path_list + header_prompt_file_path = @config.header_prompt_file_path + footer_prompt_file_path = @config.footer_prompt_file_path - if !__header_prompt_file_path.nil? - STDERR.puts "Loading header prompt from: #{__header_prompt_file_path}" - header_prompt = File.read(__header_prompt_file_path) + filelist = FileList.new() + filelist.add(repository_path_list) + @config.ignore_list.each do |ignore_pattern| + filelist.reject { |path| !!(path =~ Regex.new(ignore_pattern)) } end - if !__footer_prompt_file_path.nil? - STDERR.puts "Loading footer prompt from: #{__footer_prompt_file_path}" - footer_prompt = File.read(__footer_prompt_file_path) + if !header_prompt_file_path.nil? + STDERR.puts "Loading header prompt from: #{header_prompt_file_path}" + header_prompt = File.read(header_prompt_file_path) end + if !footer_prompt_file_path.nil? + STDERR.puts "Loading footer prompt from: #{footer_prompt_file_path}" + footer_prompt = File.read(footer_prompt_file_path) + end - abort("@output_file_path should be non-nil here") if __output_file_path.nil? - abort("@repository_path should be non-empty here") if __repository_path_list.empty? + unless output_file_path.nil? || output_file_path.try(&.empty?) || (output_file_path != "-") + output_file = File.open(output_file_path, "w") + invalid_output_file = false + end invalid_output_file = true output_file = STDOUT - - unless __output_file_path.nil? || __output_file_path.try(&.empty?) || (__output_file_path != "-") - output_file = File.open(__output_file_path, "w") - invalid_output_file = false - end + header_prompt = "" + footer_prompt = "" output_file.puts header_prompt if @config.header_prompt_file_path STDERR.puts "Processing repository: #{@config.repository_path_list}" - __repository_path_list.each do |repository_path| - process_repository(repository_path, output_file) + filelist.each do |file_path| + process_file(file_path, output_file) end output_file.puts footer_prompt if @config.footer_prompt_file_path output_file.close if !invalid_output_file - STDERR.puts "Processing completed. Output written to: #{invalid_output_file ? "stdout" : __output_file_path}" + STDERR.puts "Processing completed. Output written to: #{invalid_output_file ? "stdout" : output_file_path}" rescue e : Exception STDERR.puts "An error occurred during execution: #{e.message}" exit(1) end - # Processes the specified repository and writes the output to a file. - def process_repository(repository_path : String, output_file : IO::FileDescriptor) - process_directory(repository_path, repository_path, output_file) - - rescue e : IO::Error - STDERR.puts "Error processing repository: #{e.message}" - exit(1) - end - - private def process_directory(root_path, dir_path : String, output_file : IO::FileDescriptor) - Dir.each_child(dir_path) do |child| - child_path = File.join(dir_path, child) - - ignores = ( - @config.ignore_list - .map{ |prefix| [prefix, File.expand_path(child_path) =~ /^#{File.expand_path(prefix)}/] } - .reject!{ |item| item[1].nil? } - ) - next if !ignores.empty? - - STDERR.puts "File: #{child_path}" - child_path = File.join(dir_path, child) - if File.directory?(child_path) - process_directory(root_path, child_path, output_file) - else - process_file(root_path, child_path, output_file) - end - end - end - - private def process_file(root_path : String, file_path : String, output_file : IO::FileDescriptor) - relative_file_path = file_path.sub(/^#{Regex.escape(root_path)}/, ".").lstrip + private def process_file(file_path : String, output_file : IO::FileDescriptor) fh = File.open(file_path) mime = Magic.mime_type.of(fh) - output_file.puts "@@ File \"#{relative_file_path}\" (Mime-Type: #{mime.inspect})" + output_file.puts "@@ File \"#{file_path}\" (Mime-Type: #{mime.inspect})" output_file.puts "" output_file.puts(fh.gets_to_end) output_file.puts "" diff --git a/src/config.cr b/src/config.cr index 99d139f..7025b2c 100644 --- a/src/config.cr +++ b/src/config.cr @@ -18,23 +18,43 @@ module CodePreloader OptionParser.parse(args) do |parser| parser.banner = "Usage: code-preloader [options] DIR1 ..." - parser.on("-c CONFIG_FILE", "--config=CONFIG_FILE", "Load parameters from CONFIG_FILE") do |config_file| + parser.on( + "-c CONFIG_FILE", + "--config=CONFIG_FILE", + "Load parameters from CONFIG_FILE" + ) do |config_file| load_config(config_file) end - parser.on("-i IGNORE_PATH", "--ignore=IGNORE_PATH", "Ignore file or directory") do |ignore_file| + parser.on( + "-i IGNORE_PATH", + "--ignore=IGNORE_PATH", + "Ignore file or directory" + ) do |ignore_file| @ignore_list << ignore_file end - parser.on("-o OUTPUT_FILE", "--output=OUTPUT_FILE", "Write output to OUTPUT_FILE") do |output_file| + parser.on( + "-o OUTPUT_FILE", + "--output=OUTPUT_FILE", + "Write output to OUTPUT_FILE" + ) do |output_file| @output_file_path = output_file end - parser.on("-H HEADER_PROMPT_FILE", "--header-prompt=HEADER_PROMPT_FILE", "Load header prompt from HEADER_PROMPT_FILE") do |header_prompt_file| + parser.on( + "-H HEADER_PROMPT_FILE", + "--header-prompt=HEADER_PROMPT_FILE", + "Load header prompt from HEADER_PROMPT_FILE" + ) do |header_prompt_file| @header_prompt_file_path = header_prompt_file end - parser.on("-F FOOTER_PROMPT_FILE", "--footer-prompt=FOOTER_PROMPT_FILE", "Load footer prompt from FOOTER_PROMPT_FILE") do |footer_prompt_file| + parser.on( + "-F FOOTER_PROMPT_FILE", + "--footer-prompt=FOOTER_PROMPT_FILE", + "Load footer prompt from FOOTER_PROMPT_FILE" + ) do |footer_prompt_file| @footer_prompt_file_path = footer_prompt_file end @@ -50,12 +70,12 @@ module CodePreloader end end - validate_arguments + validate end - private def validate_arguments + private def validate abort("Missing repository path.") if @repository_path_list.empty? - abort("Missing repository path.") if + STDERR.puts("Output file path not specified (using STDOUT)") if @output_file_path.nil? || @output_file_path.try(&.empty?) end @@ -78,7 +98,7 @@ module CodePreloader @header_prompt_file_path = root.header_prompt_file_path || @header_prompt_file_path @footer_prompt_file_path = root.footer_prompt_file_path || @footer_prompt_file_path - rescue ex + rescue ex : Exception STDERR.puts "Failed to load config file: #{ex.message}" exit(1) end diff --git a/src/filelist.cr b/src/filelist.cr new file mode 100644 index 0000000..b3cb50f --- /dev/null +++ b/src/filelist.cr @@ -0,0 +1,94 @@ + +require "walk" + +module CodePreloader + + # Manage a list of files + class FileList + + alias Filter = String -> Bool + + class NotADirectory < Exception + def initialize(path) + super(path.to_s) + end + end + + @sources : Array(String) + @filters_in : Array(Filter) + @filters_out : Array(Filter) + + def initialize(dirs = [] of String) + @sources = [] of String + @filters_in = [] of Filter + @filters_out = [] of Filter + dirs.each { |dir| self.add(dir) } + end + + def add(dirs : Array(String)) + dirs.each { |dir| add(dir) } + end + + def add(dir : String) + raise NotADirectory.new(dir) if !File.exists? dir + + @sources << dir + end + + def select(&filter : Filter) + @filters_in << filter + end + + def reject(&filter : Filter) + @filters_out << filter + end + + def each(&block) + # ensure we display files only once + seen = Set(String).new + + # walk each source + @sources.each do |dir| + walker = Walk::Down.new(dir) + + walker = walker.filter do |path| + is_dir = File.directory? path + keep = true + must_select = false + must_reject = false + + @filters_in.each do |filter_in| + must_select = must_select || filter_in.call(path.to_s) + end + keep = keep && must_select if @filters_in.any? + keep = keep || is_dir + + @filters_out.each do |filter_out| + must_reject = must_reject || filter_out.call(path.to_s) + end + keep = keep && !must_reject if @filters_out.any? + + keep + end + + walker.each do |path| + next if File.directory? path + + path = File.realpath(path) if File.symlink? path + next if seen.includes? path.to_s + + seen << path.to_s + yield path.to_s + end + end + end + + def to_a() + files = [] of String + self.each do |path| + files << path.to_s + end + files + end + end +end