Source code for unravel.utilities.aggregate_files_recursively
#!/usr/bin/env python3
"""
Use ``utils_agg_files_rec`` (``agg_rec``) from UNRAVEL to recusively copy files matching a glob pattern.
Usage:
------
utils_agg_files_rec -p '<asterisk>.txt' [-s /path/to/source] [-d /path/to/destination] [--move] [-v]
"""
import shutil
from pathlib import Path
from rich.traceback import install
from rich import print
from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg
[docs]
def parse_args():
parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__)
reqs = parser.add_argument_group('Required arguments')
reqs.add_argument('-p', '--pattern', help="The pattern to match files, e.g., '*.txt'", required=True, action=SM)
opts = parser.add_argument_group('Optional arguments')
opts.add_argument('-s', '--source', help='The source directory to search files in. Default: current working dir', default='.', action=SM)
opts.add_argument('-d', '--destination', help='The destination directory to copy files to. Default: current working dir', default='.', action=SM)
opts.add_argument('-m', '--move', help='Move files instead of copying. Default: False', action='store_true', default=False)
general = parser.add_argument_group('General arguments')
general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False)
return parser.parse_args()
[docs]
def find_and_copy_files(pattern, src_dir, dest_dir, move=False):
src_dir = Path(src_dir).resolve()
dest_dir = Path(dest_dir)
if not dest_dir.is_absolute():
dest_dir = src_dir / dest_dir
dest_dir = dest_dir.resolve()
dest_dir.mkdir(parents=True, exist_ok=True)
matched_files = [p.resolve() for p in src_dir.rglob(pattern) if p.is_file()]
files_to_process = []
seen_outputs = set()
for file_path in matched_files:
out_path = dest_dir / file_path.name
# Case 1: destination is the source root.
# Skip files already directly in the root destination.
if dest_dir == src_dir and file_path.parent == dest_dir:
continue
# Case 2: destination is a subdirectory.
# Skip files already inside that destination directory.
if dest_dir != src_dir and dest_dir in file_path.parents:
continue
# Avoid processing duplicate target basenames more than once.
# This matters if matches exist both in nested dirs and _CCF30.
if out_path in seen_outputs:
continue
seen_outputs.add(out_path)
files_to_process.append((file_path, out_path))
if len(files_to_process) == 0:
print(f"\n [red1]No files found matching the pattern:[/] [bold]{pattern}[/] in {src_dir}\n")
import sys
sys.exit()
n_copied = 0
n_skipped = 0
for file_path, out_path in files_to_process:
if out_path.exists():
print(f" [yellow]Skipping existing file:[/] {out_path}")
n_skipped += 1
continue
if move:
shutil.move(str(file_path), str(out_path))
else:
shutil.copy2(str(file_path), str(out_path))
n_copied += 1
action = "Moved" if move else "Copied"
print(f"\n{action} {n_copied} file(s) to: {dest_dir}")
if n_skipped:
print(f"Skipped {n_skipped} existing file(s).\n")
else:
print()
[docs]
@log_command
def main():
install()
args = parse_args()
Configuration.verbose = args.verbose
verbose_start_msg()
find_and_copy_files(args.pattern, args.source, args.destination, args.move)
verbose_end_msg()
if __name__ == '__main__':
main()