Source code for unravel.utilities.points_compressor

#!/usr/bin/env python3

"""
Use ``utils_points_compressor`` from UNRAVEL to pack or unpack point data in a CSV file or summarize the number of points per region.

Input:
    - CSV file with either unpacked (`x, y, z, Region_ID`) or packed (`x, y, z, Region_ID, count`) format.

Output:
    - CSV file with the desired packed or unpacked format.
    - Or save a summary CSV with the number of points per region.

Note:
    - Packing: Group points with the same coordinates and `Region_ID`, adding a `count` column.
    - Unpacking: Expand packed points back to individual rows based on the `count` column.
    - Summary: Output a CSV summarizing the number of points per region.
    - Use only one of the following options: -p, -u, -s.
    - The summary option can be used with either packed or unpacked data.

Usage:
------
    utils_points_compressor -i path/<asterisk>_points.csv [-p or -u or -s] [-v]
"""

import pandas as pd
from pathlib import Path
from rich import print
from rich.traceback import install

from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg, print_func_name_args_times, process_files_with_glob


[docs] def parse_args(): parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__) reqs = parser.add_argument_group('Required arguments') reqs.add_argument('-i', '--input', help="Path to the input CSV file or a glob pattern.", required=True, action=SM) opts = parser.add_argument_group('Optional arguments') opts.add_argument('-p', '--pack', help="Pack the points by grouping them.", action='store_true') opts.add_argument('-u', '--unpack', help="Unpack the points by expanding them based on the `count` column.", action='store_true') opts.add_argument('-s', '--summary', help='Output a CSV summarizing the number of points per region.', action='store_true') general = parser.add_argument_group('General arguments') general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False) return parser.parse_args()
[docs] @print_func_name_args_times() def pack_points(df): """ Pack points by grouping identical coordinates and summing their occurrences. Parameters: ----------- df : pandas.DataFrame DataFrame with columns ['x', 'y', 'z', 'Region_ID'] Returns: -------- packed_df : pandas.DataFrame DataFrame with columns ['x', 'y', 'z', 'Region_ID', 'count'] """ packed_df = df.groupby(['x', 'y', 'z', 'Region_ID']).size().reset_index(name='count') return packed_df
[docs] @print_func_name_args_times() def unpack_points(df): """ Unpack points by expanding them based on the `count` column. Parameters: ----------- df : pandas.DataFrame DataFrame with columns ['x', 'y', 'z', 'Region_ID', 'count'] Returns: -------- unpacked_df : pandas.DataFrame DataFrame with columns ['x', 'y', 'z', 'Region_ID'] """ # Repeat rows based on the 'count' column unpacked_df = df.loc[df.index.repeat(df['count'])].drop(columns=['count']).reset_index(drop=True) return unpacked_df
[docs] @print_func_name_args_times() def summarize_points(df): """ Summarize points by counting the number of points per `Region_ID`. Parameters: ----------- df : pandas.DataFrame DataFrame with columns ['x', 'y', 'z', 'Region_ID'] or ['x', 'y', 'z', 'Region_ID', 'count'] Returns: -------- summary_df : pandas.DataFrame DataFrame with columns ['Region_ID', 'count'] summarizing the number of points per region. """ if 'count' in df.columns: summary_df = df.groupby('Region_ID')['count'].sum().reset_index(name='count') else: summary_df = df['Region_ID'].value_counts().reset_index() summary_df.columns = ['Region_ID', 'count'] return summary_df
[docs] @print_func_name_args_times() def points_compressor(file_path, pack=False, unpack=False, summary=False): """ Pack, unpack, or summarize points in a CSV file. Parameters: ----------- file_path : str Path to the input CSV file. pack : bool, optional Pack the points by grouping them. unpack : bool, optional Unpack the points by expanding them based on the `count` column. summary : bool, optional Output a CSV summarizing the number of points per region. """ file_path = str(file_path) df = pd.read_csv(file_path) output_path = None if pack: if 'count' in df.columns: print(f"\n [red1 bold]Skipping packing:[/] '{file_path}' is already packed.") return df = pack_points(df) output_path = file_path.replace('.csv', '_packed.csv') elif unpack: if 'count' not in df.columns: print(f"\n [red1 bold]Skipping unpacking:[/] '{file_path}' is already unpacked.") return df = unpack_points(df) output_path = file_path.replace('.csv', '_unpacked.csv') elif summary: df = summarize_points(df) output_path = file_path.replace('.csv', '_summary.csv') output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) df.to_csv(output_path, index=False) print(f"\n Points saved to {output_path}\n")
[docs] @log_command def main(): install() args = parse_args() Configuration.verbose = args.verbose verbose_start_msg() process_files_with_glob( glob_pattern=args.input, processing_func=points_compressor, pack=args.pack, unpack=args.unpack, summary=args.summary ) verbose_end_msg()
if __name__ == '__main__': main()