feat: improve files filtering

This commit is contained in:
Glenn Y. Rolland 2024-01-02 12:32:00 +01:00
parent e0f7244db7
commit fb2b85fcb8
11 changed files with 288 additions and 65 deletions

View file

@ -4,3 +4,7 @@ shards:
git: https://github.com/dscottboggs/magic.cr.git git: https://github.com/dscottboggs/magic.cr.git
version: 1.1.0 version: 1.1.0
walk:
git: https://github.com/alexherbo2/walk.cr.git
version: 0.1.0+git.commit.765d758c0f966cccc98c1d81c7ccd0c1f71928e3

View file

@ -12,6 +12,8 @@ authors:
dependencies: dependencies:
magic: magic:
github: dscottboggs/magic.cr github: dscottboggs/magic.cr
walk:
github: alexherbo2/walk.cr
# description: | # description: |
# Short description of chatgpt-preloader # Short description of chatgpt-preloader

10
spec/cli_spec.cr Normal file
View file

@ -0,0 +1,10 @@
require "./spec_helper"
require "../src/cli"
describe CodePreloader::Cli do
it "works" do
# false.should eq(true)
end
end

0
spec/config_spec.cr Normal file
View file

View file

@ -0,0 +1,6 @@
# Alice
## Who is alice?
## When is alice?

7
spec/filelist_data/bob.c Normal file
View file

@ -0,0 +1,7 @@
#include <stdio.h>
int main(int argc, char** argv) {
printf("Hello world\n");
return 0;
}

104
spec/filelist_spec.cr Normal file
View file

@ -0,0 +1,104 @@
require "./spec_helper"
require "../src/cli"
alias FileList = CodePreloader::FileList
describe CodePreloader::FileList do
it "can be created empty" do
fl = FileList.new
end
it "can be created with a list of directories" do
fl = FileList.new(["src/", "spec/"])
end
it "verifies that initial directories exists" do
expect_raises(FileList::NotADirectory) do
fl = FileList.new(["Alice", "Bob"])
end
end
it "can append extra sources" do
fl = FileList.new()
fl.add "spec/"
end
it "verifies that appended directories exists" do
fl = FileList.new()
expect_raises(FileList::NotADirectory) do
fl.add "Alice"
end
end
it "accept adding reject filters" do
fl = FileList.new()
fl.reject { |item| !!(item =~ /name/) }
end
it "accept adding select filters" do
fl = FileList.new()
fl.select { |item| !!(item =~ /name/) }
end
it "enumerates the files" do
fl = FileList.new()
fl.add("spec/filelist_data")
files = Dir["spec/filelist_data/*"]
fl.each do |file|
files.should contain(file)
files = files - [file]
end
files.size.should eq(0)
end
it "doesn't enumerate duplicate files" do
fl = FileList.new()
fl.add("spec/filelist_data")
fl.add("spec/filelist_data")
files = [] of String
fl.each do |file|
files << file
end
files.size.should eq(files.uniq.size)
end
it "doesn't enumerate files filtered out by select" do
fl = FileList.new()
fl.add("spec/filelist_data")
fl.select { |path| !!(path =~ /\.c$/) }
files = Dir["spec/filelist_data/*.c"]
fl.each do |file|
files.should contain(file)
files = files - [file]
end
files.size.should eq(0)
end
it "doesn't enumerate files filtered out by reject" do
fl = FileList.new()
fl.add("spec/filelist_data")
fl.reject { |path| !!(path =~ /\.txt$/) }
files = Dir["spec/filelist_data/*.c"]
fl.each do |file|
files.should contain(file)
files = files - [file]
end
files.size.should eq(0)
end
it "export the files as an array" do
end
it "doesn't export duplicate files" do
end
it "doesn't export filtered out files" do
end
end

4
spec/spec_helper.cr Normal file
View file

@ -0,0 +1,4 @@
require "spec"
# require "../src/"

View file

@ -6,6 +6,7 @@ require "option_parser"
require "magic" require "magic"
require "./config" require "./config"
require "./filelist"
# The CodePreloader module organizes classes and methods related to preloading code files. # The CodePreloader module organizes classes and methods related to preloading code files.
module CodePreloader module CodePreloader
@ -20,90 +21,61 @@ module CodePreloader
@config.parse_arguments(args) @config.parse_arguments(args)
end end
# Executes the main functionality of the CLI application. # Executes the main functionality of the CLI application.
def exec def exec
header_prompt = "" # get local values for typing
footer_prompt = "" output_file_path = @output_file_path
__header_prompt_file_path = @config.header_prompt_file_path repository_path_list = @config.repository_path_list
__footer_prompt_file_path = @config.footer_prompt_file_path header_prompt_file_path = @config.header_prompt_file_path
__output_file_path = @output_file_path footer_prompt_file_path = @config.footer_prompt_file_path
__repository_path_list = @config.repository_path_list
if !__header_prompt_file_path.nil? filelist = FileList.new()
STDERR.puts "Loading header prompt from: #{__header_prompt_file_path}" filelist.add(repository_path_list)
header_prompt = File.read(__header_prompt_file_path) @config.ignore_list.each do |ignore_pattern|
filelist.reject { |path| !!(path =~ Regex.new(ignore_pattern)) }
end end
if !__footer_prompt_file_path.nil? if !header_prompt_file_path.nil?
STDERR.puts "Loading footer prompt from: #{__footer_prompt_file_path}" STDERR.puts "Loading header prompt from: #{header_prompt_file_path}"
footer_prompt = File.read(__footer_prompt_file_path) header_prompt = File.read(header_prompt_file_path)
end end
if !footer_prompt_file_path.nil?
STDERR.puts "Loading footer prompt from: #{footer_prompt_file_path}"
footer_prompt = File.read(footer_prompt_file_path)
end
abort("@output_file_path should be non-nil here") if __output_file_path.nil? unless output_file_path.nil? || output_file_path.try(&.empty?) || (output_file_path != "-")
abort("@repository_path should be non-empty here") if __repository_path_list.empty? output_file = File.open(output_file_path, "w")
invalid_output_file = false
end
invalid_output_file = true invalid_output_file = true
output_file = STDOUT output_file = STDOUT
header_prompt = ""
unless __output_file_path.nil? || __output_file_path.try(&.empty?) || (__output_file_path != "-") footer_prompt = ""
output_file = File.open(__output_file_path, "w")
invalid_output_file = false
end
output_file.puts header_prompt if @config.header_prompt_file_path output_file.puts header_prompt if @config.header_prompt_file_path
STDERR.puts "Processing repository: #{@config.repository_path_list}" STDERR.puts "Processing repository: #{@config.repository_path_list}"
__repository_path_list.each do |repository_path| filelist.each do |file_path|
process_repository(repository_path, output_file) process_file(file_path, output_file)
end end
output_file.puts footer_prompt if @config.footer_prompt_file_path output_file.puts footer_prompt if @config.footer_prompt_file_path
output_file.close if !invalid_output_file output_file.close if !invalid_output_file
STDERR.puts "Processing completed. Output written to: #{invalid_output_file ? "stdout" : __output_file_path}" STDERR.puts "Processing completed. Output written to: #{invalid_output_file ? "stdout" : output_file_path}"
rescue e : Exception rescue e : Exception
STDERR.puts "An error occurred during execution: #{e.message}" STDERR.puts "An error occurred during execution: #{e.message}"
exit(1) exit(1)
end end
# Processes the specified repository and writes the output to a file. private def process_file(file_path : String, output_file : IO::FileDescriptor)
def process_repository(repository_path : String, output_file : IO::FileDescriptor)
process_directory(repository_path, repository_path, output_file)
rescue e : IO::Error
STDERR.puts "Error processing repository: #{e.message}"
exit(1)
end
private def process_directory(root_path, dir_path : String, output_file : IO::FileDescriptor)
Dir.each_child(dir_path) do |child|
child_path = File.join(dir_path, child)
ignores = (
@config.ignore_list
.map{ |prefix| [prefix, File.expand_path(child_path) =~ /^#{File.expand_path(prefix)}/] }
.reject!{ |item| item[1].nil? }
)
next if !ignores.empty?
STDERR.puts "File: #{child_path}"
child_path = File.join(dir_path, child)
if File.directory?(child_path)
process_directory(root_path, child_path, output_file)
else
process_file(root_path, child_path, output_file)
end
end
end
private def process_file(root_path : String, file_path : String, output_file : IO::FileDescriptor)
relative_file_path = file_path.sub(/^#{Regex.escape(root_path)}/, ".").lstrip
fh = File.open(file_path) fh = File.open(file_path)
mime = Magic.mime_type.of(fh) mime = Magic.mime_type.of(fh)
output_file.puts "@@ File \"#{relative_file_path}\" (Mime-Type: #{mime.inspect})" output_file.puts "@@ File \"#{file_path}\" (Mime-Type: #{mime.inspect})"
output_file.puts "" output_file.puts ""
output_file.puts(fh.gets_to_end) output_file.puts(fh.gets_to_end)
output_file.puts "" output_file.puts ""

View file

@ -18,23 +18,43 @@ module CodePreloader
OptionParser.parse(args) do |parser| OptionParser.parse(args) do |parser|
parser.banner = "Usage: code-preloader [options] DIR1 ..." parser.banner = "Usage: code-preloader [options] DIR1 ..."
parser.on("-c CONFIG_FILE", "--config=CONFIG_FILE", "Load parameters from CONFIG_FILE") do |config_file| parser.on(
"-c CONFIG_FILE",
"--config=CONFIG_FILE",
"Load parameters from CONFIG_FILE"
) do |config_file|
load_config(config_file) load_config(config_file)
end end
parser.on("-i IGNORE_PATH", "--ignore=IGNORE_PATH", "Ignore file or directory") do |ignore_file| parser.on(
"-i IGNORE_PATH",
"--ignore=IGNORE_PATH",
"Ignore file or directory"
) do |ignore_file|
@ignore_list << ignore_file @ignore_list << ignore_file
end end
parser.on("-o OUTPUT_FILE", "--output=OUTPUT_FILE", "Write output to OUTPUT_FILE") do |output_file| parser.on(
"-o OUTPUT_FILE",
"--output=OUTPUT_FILE",
"Write output to OUTPUT_FILE"
) do |output_file|
@output_file_path = output_file @output_file_path = output_file
end end
parser.on("-H HEADER_PROMPT_FILE", "--header-prompt=HEADER_PROMPT_FILE", "Load header prompt from HEADER_PROMPT_FILE") do |header_prompt_file| parser.on(
"-H HEADER_PROMPT_FILE",
"--header-prompt=HEADER_PROMPT_FILE",
"Load header prompt from HEADER_PROMPT_FILE"
) do |header_prompt_file|
@header_prompt_file_path = header_prompt_file @header_prompt_file_path = header_prompt_file
end end
parser.on("-F FOOTER_PROMPT_FILE", "--footer-prompt=FOOTER_PROMPT_FILE", "Load footer prompt from FOOTER_PROMPT_FILE") do |footer_prompt_file| parser.on(
"-F FOOTER_PROMPT_FILE",
"--footer-prompt=FOOTER_PROMPT_FILE",
"Load footer prompt from FOOTER_PROMPT_FILE"
) do |footer_prompt_file|
@footer_prompt_file_path = footer_prompt_file @footer_prompt_file_path = footer_prompt_file
end end
@ -50,12 +70,12 @@ module CodePreloader
end end
end end
validate_arguments validate
end end
private def validate_arguments private def validate
abort("Missing repository path.") if @repository_path_list.empty? abort("Missing repository path.") if @repository_path_list.empty?
abort("Missing repository path.") if
STDERR.puts("Output file path not specified (using STDOUT)") if @output_file_path.nil? || @output_file_path.try(&.empty?) STDERR.puts("Output file path not specified (using STDOUT)") if @output_file_path.nil? || @output_file_path.try(&.empty?)
end end
@ -78,7 +98,7 @@ module CodePreloader
@header_prompt_file_path = root.header_prompt_file_path || @header_prompt_file_path @header_prompt_file_path = root.header_prompt_file_path || @header_prompt_file_path
@footer_prompt_file_path = root.footer_prompt_file_path || @footer_prompt_file_path @footer_prompt_file_path = root.footer_prompt_file_path || @footer_prompt_file_path
rescue ex rescue ex : Exception
STDERR.puts "Failed to load config file: #{ex.message}" STDERR.puts "Failed to load config file: #{ex.message}"
exit(1) exit(1)
end end

94
src/filelist.cr Normal file
View file

@ -0,0 +1,94 @@
require "walk"
module CodePreloader
# Manage a list of files
class FileList
alias Filter = String -> Bool
class NotADirectory < Exception
def initialize(path)
super(path.to_s)
end
end
@sources : Array(String)
@filters_in : Array(Filter)
@filters_out : Array(Filter)
def initialize(dirs = [] of String)
@sources = [] of String
@filters_in = [] of Filter
@filters_out = [] of Filter
dirs.each { |dir| self.add(dir) }
end
def add(dirs : Array(String))
dirs.each { |dir| add(dir) }
end
def add(dir : String)
raise NotADirectory.new(dir) if !File.exists? dir
@sources << dir
end
def select(&filter : Filter)
@filters_in << filter
end
def reject(&filter : Filter)
@filters_out << filter
end
def each(&block)
# ensure we display files only once
seen = Set(String).new
# walk each source
@sources.each do |dir|
walker = Walk::Down.new(dir)
walker = walker.filter do |path|
is_dir = File.directory? path
keep = true
must_select = false
must_reject = false
@filters_in.each do |filter_in|
must_select = must_select || filter_in.call(path.to_s)
end
keep = keep && must_select if @filters_in.any?
keep = keep || is_dir
@filters_out.each do |filter_out|
must_reject = must_reject || filter_out.call(path.to_s)
end
keep = keep && !must_reject if @filters_out.any?
keep
end
walker.each do |path|
next if File.directory? path
path = File.realpath(path) if File.symlink? path
next if seen.includes? path.to_s
seen << path.to_s
yield path.to_s
end
end
end
def to_a()
files = [] of String
self.each do |path|
files << path.to_s
end
files
end
end
end