Source code for unravel.cluster_stats.stats_table

#!/usr/bin/env python3

"""
Use stats_table.py from UNRAVEL to recursively find and concatenate matching CSVs (e.g., to summarize cluster validation info).

Usage:
------
    path/stats_table.py -cp cluster_validation_info.csv -o cluster_validation_summary.csv
"""

import pandas as pd
from pathlib import Path
from glob import glob
from rich.traceback import install

from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM


[docs] def parse_args(): parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__) opts = parser.add_argument_group('Optional arguments') opts.add_argument('-cp', '--csv_pattern', help="Pattern to match csv files. Default: cluster_validation_results.csv", default='cluster_validation_info.csv', action=SM) opts.add_argument('-o', '--output', help='path/output.csv. Default: cluster_validation_summary.csv', default='cluster_validation_summary.csv', action=SM) return parser.parse_args()
[docs] def cluster_summary(csv_pattern, output): # Use glob to find all matching CSV files recursively csv_files = glob(str(f'**/{csv_pattern}'), recursive=True) if not csv_files: print(f"No CSV files found matching the pattern {csv_pattern}.") return # Read and concatenate all matching CSV files concatenated_df = pd.concat([pd.read_csv(f) for f in csv_files]) # Sort by the first and second columns if they exist if len(concatenated_df.columns) >= 2: concatenated_df = concatenated_df.sort_values(by=[concatenated_df.columns[0], concatenated_df.columns[1]]) # Save the concatenated CSV file output = Path(output) output.parent.mkdir(parents=True, exist_ok=True) concatenated_df.to_csv(output, index=False)
[docs] def main(): args = parse_args() cluster_summary(args.csv_pattern, args.output)
if __name__ == '__main__': install() main()