Source code for unravel.tabular.key_value_to_table

#!/usr/bin/env python3

"""
Use ``tabular_key_value_to_table`` or ``kv_table`` from UNRAVEL to convert structured key-value data into a tabular format.

Input file format:
    - Format: <key><delimiter><value>, one pair per line or row
    - Groups of key-value pairs (separated by repeated first key) form rows in the output.
    - Example (txt or 2-col csv/xlsx):
    - cluster,1
    - Pearson correlation,-0.1567
    - p-value,0.2359
    - cluster,2
    - Pearson correlation,0.1376
    - p-value,0.4449

Output file format:
    - A tabular file (.csv or .xlsx) where each key becomes a column header, and each group forms a row.
    - Example:
    - cluster, Person_correlation, p_value
    - 1, -0.1567, 0.2359
    - 2, 0.1376, 0.4449

Usage:
------
    tabular_key_value_to_table -i input.csv [-o output.csv] [-d ,] [-v]
"""

from pathlib import Path
import pandas as pd
from rich import print
from rich.traceback import install

from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration 
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg
from unravel.tabular.utils import load_tabular_file, save_tabular_file


[docs] def parse_args(): parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__) reqs = parser.add_argument_group('Required arguments') reqs.add_argument('-i', '--input', help='Path to the input file (.txt, .csv, or .xlsx)', required=True, action=SM) opts = parser.add_argument_group('Optional arguments') opts.add_argument('-d', '--delimiter', help="Delimiter for text input. Default: ','.", default=',', action=SM) opts.add_argument('-o', '--output', help="Path to the output file (.csv or .xlsx). Default: input with .csv extension.", default=None, action=SM) general = parser.add_argument_group('General arguments') general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False) return parser.parse_args()
[docs] def reshape_key_value_blocks(pairs: list[tuple[str, str]]) -> pd.DataFrame: """Reshape a list of key-value pairs into a table (one row per group).""" structured_data = [] current_row = {} first_key = pairs[0][0] if pairs else None for key, value in pairs: key = key.strip().replace(" ", "_") if key == first_key and current_row: structured_data.append(current_row) current_row = {} current_row[key] = value.strip() if current_row: structured_data.append(current_row) return pd.DataFrame(structured_data).fillna(pd.NA)
[docs] @log_command def main(): install() args = parse_args() Configuration.verbose = args.verbose verbose_start_msg() # Load and parse key-value pairs input_path = Path(args.input) suffix = input_path.suffix.lower() if suffix == '.txt': lines = input_path.read_text(encoding='utf-8').strip().splitlines() raw_pairs = [line.split(args.delimiter, 1) for line in lines if args.delimiter in line] elif suffix in ['.csv', '.xlsx']: df, _ = load_tabular_file(args.input) if df.shape[1] != 2: raise ValueError(f"Input file must have exactly 2 columns for key-value structure. Found {df.shape[1]}") if args.skip_header: df = df.iloc[1:] raw_pairs = list(df.itertuples(index=False, name=None)) else: raise ValueError(f"Unsupported file format: {args.input}") df_out = reshape_key_value_blocks(raw_pairs) # Determine output path default_ext = '.xlsx' if suffix == '.xlsx' else '.csv' output_path = args.output or str(input_path.with_suffix(default_ext)) save_tabular_file(df_out, output_path, index=False, verbose=args.verbose) verbose_end_msg()
if __name__ == '__main__': main()