Source code for unravel.utilities.points_compressor

#!/usr/bin/env python3

"""
Use ``utils_points_compressor`` (``points_compressor``) from UNRAVEL to pack or unpack point data in a CSV file or summarize the number of points per region.

Input:
    - CSV file with either unpacked (`x, y, z, Region_ID`) or packed (`x, y, z, Region_ID, count`) format.

Output:
    - CSV file with the desired packed or unpacked format.
    - Or save a summary CSV with the number of points per region.

Note:
    - Packing: Group points with the same coordinates and `Region_ID`, adding a `count` column.
    - Unpacking: Expand packed points back to individual rows based on the `count` column.
    - Summary: Output a CSV summarizing the number of points per region.
    - Use only one of the following options: -p, -u, -s.
    - The summary option can be used with either packed or unpacked data.

Usage:
------
    utils_points_compressor -i path/<asterisk>_points.csv [-p or -u or -s] [-v]
"""

import pandas as pd
from pathlib import Path
from rich import print
from rich.traceback import install

from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg, print_func_name_args_times, process_files_with_glob



[docs]
def parse_args():
    parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__)

    reqs = parser.add_argument_group('Required arguments')
    reqs.add_argument('-i', '--input', help="Path to the input CSV file or a glob pattern.", required=True, action=SM)

    opts = parser.add_argument_group('Optional arguments')
    opts.add_argument('-p', '--pack', help="Pack the points by grouping them.", action='store_true')
    opts.add_argument('-u', '--unpack', help="Unpack the points by expanding them based on the `count` column.", action='store_true')
    opts.add_argument('-s', '--summary', help='Output a CSV summarizing the number of points per region.', action='store_true')

    general = parser.add_argument_group('General arguments')
    general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False)

    return parser.parse_args()



[docs]
@print_func_name_args_times()
def pack_points(df):
    """
    Pack points by grouping identical coordinates and summing their occurrences.

    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame with columns ['x', 'y', 'z', 'Region_ID']

    Returns:
    --------
    packed_df : pandas.DataFrame
        DataFrame with columns ['x', 'y', 'z', 'Region_ID', 'count']
    """
    packed_df = df.groupby(['x', 'y', 'z', 'Region_ID']).size().reset_index(name='count')
    return packed_df



[docs]
@print_func_name_args_times()
def unpack_points(df):
    """
    Unpack points by expanding them based on the `count` column.

    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame with columns ['x', 'y', 'z', 'Region_ID', 'count']

    Returns:
    --------
    unpacked_df : pandas.DataFrame
        DataFrame with columns ['x', 'y', 'z', 'Region_ID']
    """
    # Repeat rows based on the 'count' column
    unpacked_df = df.loc[df.index.repeat(df['count'])].drop(columns=['count']).reset_index(drop=True)
    return unpacked_df



[docs]
@print_func_name_args_times()
def summarize_points(df):
    """
    Summarize points by counting the number of points per `Region_ID`.

    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame with columns ['x', 'y', 'z', 'Region_ID'] or ['x', 'y', 'z', 'Region_ID', 'count']

    Returns:
    --------
    summary_df : pandas.DataFrame
        DataFrame with columns ['Region_ID', 'count'] summarizing the number of points per region.
    """
    if 'count' in df.columns:
        summary_df = df.groupby('Region_ID')['count'].sum().reset_index(name='count')
    else:
        summary_df = df['Region_ID'].value_counts().reset_index()
        summary_df.columns = ['Region_ID', 'count']
    return summary_df



[docs]
@print_func_name_args_times()
def points_compressor(file_path, pack=False, unpack=False, summary=False):
    """
    Pack, unpack, or summarize points in a CSV file.

    Parameters:
    -----------
    file_path : str
        Path to the input CSV file.
    
    pack : bool, optional
        Pack the points by grouping them.

    unpack : bool, optional
        Unpack the points by expanding them based on the `count` column.

    summary : bool, optional
        Output a CSV summarizing the number of points per region.
    """

    file_path = str(file_path)
    df = pd.read_csv(file_path)
    output_path = None

    if pack:
        if 'count' in df.columns:
            print(f"\n    [red1 bold]Skipping packing:[/] '{file_path}' is already packed.")
            return
        df = pack_points(df)
        output_path = file_path.replace('.csv', '_packed.csv')
    elif unpack:
        if 'count' not in df.columns:
            print(f"\n    [red1 bold]Skipping unpacking:[/] '{file_path}' is already unpacked.")
            return
        df = unpack_points(df)
        output_path = file_path.replace('.csv', '_unpacked.csv')
    elif summary:
        df = summarize_points(df)
        output_path = file_path.replace('.csv', '_summary.csv')
    
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(output_path, index=False)
    print(f"\n    Points saved to {output_path}\n")




[docs]
@log_command
def main():
    install()
    args = parse_args()
    Configuration.verbose = args.verbose
    verbose_start_msg()

    process_files_with_glob(
        glob_pattern=args.input,
        processing_func=points_compressor,
        pack=args.pack,
        unpack=args.unpack,
        summary=args.summary
    )

    verbose_end_msg()



if __name__ == '__main__':
    main()