Source code for unravel.tabular.key_value_to_table

#!/usr/bin/env python3

"""
Use ``tabular_key_value_to_table`` or ``kv_table`` from UNRAVEL to convert structured key-value data into a tabular format.

Input file format:
    - Format: <key><delimiter><value>, one pair per line or row
    - Groups of key-value pairs (separated by repeated first key) form rows in the output.
    - Example (txt or 2-col csv/xlsx):
    - cluster,1
    - Pearson correlation,-0.1567
    - p-value,0.2359
    - cluster,2
    - Pearson correlation,0.1376
    - p-value,0.4449

Output file format:
    - A tabular file (.csv or .xlsx) where each key becomes a column header, and each group forms a row.
    - Example:
    - cluster, Person_correlation, p_value
    - 1, -0.1567, 0.2359
    - 2, 0.1376, 0.4449

Usage:
------
    tabular_key_value_to_table -i input.csv [-o output.csv] [-d ,] [-v]
"""

from pathlib import Path
import pandas as pd
from rich import print
from rich.traceback import install

from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration 
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg
from unravel.tabular.utils import load_tabular_file, save_tabular_file



[docs]
def parse_args():
    parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__)

    reqs = parser.add_argument_group('Required arguments')
    reqs.add_argument('-i', '--input', help='Path to the input file (.txt, .csv, or .xlsx)', required=True, action=SM)

    opts = parser.add_argument_group('Optional arguments')
    opts.add_argument('-d', '--delimiter', help="Delimiter for text input. Default: ','.", default=',', action=SM)
    opts.add_argument('-o', '--output', help="Path to the output file (.csv or .xlsx). Default: input with .csv extension.", default=None, action=SM)

    general = parser.add_argument_group('General arguments')
    general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False)

    return parser.parse_args()




[docs]
def reshape_key_value_blocks(pairs: list[tuple[str, str]]) -> pd.DataFrame:
    """Reshape a list of key-value pairs into a table (one row per group)."""
    structured_data = []
    current_row = {}
    first_key = pairs[0][0] if pairs else None

    for key, value in pairs:
        key = key.strip().replace(" ", "_")
        if key == first_key and current_row:
            structured_data.append(current_row)
            current_row = {}
        current_row[key] = value.strip()
    if current_row:
        structured_data.append(current_row)

    return pd.DataFrame(structured_data).fillna(pd.NA)




[docs]
@log_command
def main():
    install()
    args = parse_args()
    Configuration.verbose = args.verbose
    verbose_start_msg()

    # Load and parse key-value pairs
    input_path = Path(args.input)
    suffix = input_path.suffix.lower()

    if suffix == '.txt':
        lines = input_path.read_text(encoding='utf-8').strip().splitlines()
        raw_pairs = [line.split(args.delimiter, 1) for line in lines if args.delimiter in line]
    elif suffix in ['.csv', '.xlsx']:
        df, _ = load_tabular_file(args.input)
        if df.shape[1] != 2:
            raise ValueError(f"Input file must have exactly 2 columns for key-value structure. Found {df.shape[1]}")
        
        if args.skip_header:
            df = df.iloc[1:]
        raw_pairs = list(df.itertuples(index=False, name=None))

    else:
        raise ValueError(f"Unsupported file format: {args.input}")

    df_out = reshape_key_value_blocks(raw_pairs)

    # Determine output path
    default_ext = '.xlsx' if suffix == '.xlsx' else '.csv'
    output_path = args.output or str(input_path.with_suffix(default_ext))
    save_tabular_file(df_out, output_path, index=False, verbose=args.verbose)

    verbose_end_msg()



if __name__ == '__main__':
    main()