Source code for unravel.allen_institute.mapmysections.segmentation_summary

#!/usr/bin/env python3

"""
Use ``mms_seg_summary`` or ``mms_ss`` from UNRAVEL to summarize the prevalence of voxels for somata, endothelial cells, and astrocytes from Ilastik segmentations.

Note:
    - Designed for MMS segmentations of somata (label 1), endothelial cells (3), and astrocytes (4).
    - For each sample, voxel counts and proportions are computed for each cell type.
    - For example, if a sample has 1000 total segmented voxels, with 600 somatic voxels, 300 endothelial voxels, and 100 astroglia voxels, the proportions would be 0.6, 0.3, and 0.1 respectively.

Prereqs:
    - ``seg_ilastik`` outputs (e.g., MMS_seg/MMS_seg_1.nii.gz, MMS_seg/MMS_seg_3.nii.gz, MMS_seg/MMS_seg_4.nii.gz).

Output:
    - Per-sample CSVs saved to <sample>/MMS_seg/<sample>_segmentation_summary.csv
    - Columns: sample, somata_count, endothelial_count, astrocytes_count, total_count, somata_prop, endothelial_prop, astrocytes_prop

Next steps:
    - Use ``agg`` to aggregate results across samples and cd to the target directory.
    - Use ``concat_with_source`` to merge outputs across samples.
    - If most voxels are endothelial, the sample is likely enriched for endothelial cells (manually verify and revise cell type proportions as needed).
    - If most voxels are astroglial, the sample is likely enriched for astrocytes (manually verify and revise cell type proportions as needed).
    
Usage:
------
    mms_seg_summary [-s seg_dir] [-d path/to/dirs] [-p 'ID_*'] [-v]
"""

import numpy as np
from pathlib import Path
from rich import print
from rich.live import Live
from rich.traceback import install

from unravel.core.config import Configuration
from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.img_io import load_nii
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg, get_samples, initialize_progress_bar



[docs]
def parse_args():
    parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__)

    opts = parser.add_argument_group('Optional arguments')
    opts.add_argument('-s', '--seg_dir', help="Segmentation directory relative to each sample dir. Default: 'MMS_seg'", default="MMS_seg", action=SM)

    general = parser.add_argument_group('General arguments')
    general.add_argument('-d', '--dirs', help='Paths to "ID*" dirs and/or dirs containing them (space-separated) for batch processing. Default: current dir', nargs='*', default=None, action=SM)
    general.add_argument('-p', '--pattern', help='Pattern for directories to process. Default: ID_*', default='ID_*', action=SM)
    general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False)

    return parser.parse_args()


# TODO: Add an option(s) to specify which labels to include to make this more generalizable.


[docs]
def get_seg_voxel_counts(seg_folder, classes):
    counts_dict = {} 
    for label, name in classes.items(): # label: name (e.g., 1: 'somata')
        img_name = f"{seg_folder.name}_{label}.nii.gz"
        img_path = seg_folder / img_name
        if not img_path.exists():
            print(f"[red]Warning: {img_path} does not exist.")
            continue
        img = load_nii(img_path)
        counts_dict[name] = int(np.count_nonzero(img)) # name: count
    return counts_dict



[docs]
def compute_proportions(counts_dict):
    total_voxel_count = sum(counts_dict.values()) # Total voxel count across all classes (name: count)
    if total_voxel_count == 0:
        return {name: 0.0 for name in counts_dict}
    return {name: counts_dict[name] / total_voxel_count for name in counts_dict} # name: proportion



[docs]
def process_and_write_line(sample, seg_folder, output_csv, classes):
    counts_dict = get_seg_voxel_counts(seg_folder, classes) # name: count
    total_voxel_count = sum(counts_dict.values()) # Total voxel count across all classes
    proportions_dict = compute_proportions(counts_dict) # name: proportion
    output_csv.write(
        f"{sample},{counts_dict['somata']},{counts_dict['endothelial']},{counts_dict['astrocytes']},{total_voxel_count},{proportions_dict['somata']:.4f},{proportions_dict['endothelial']:.4f},{proportions_dict['astrocytes']:.4f}\n"
    )



[docs]
@log_command
def main():
    install()
    args = parse_args()
    Configuration.verbose = args.verbose
    verbose_start_msg()

    sample_paths = get_samples(args.dirs, args.pattern, args.verbose)

    progress, task_id = initialize_progress_bar(len(sample_paths), "[red]Processing samples...")
    with Live(progress):
        for sample_path in sample_paths:

            labels_dict = {1: 'somata', 3: 'endothelial', 4: 'astrocytes'}
            header = "sample,somata_count,endothelial_count,astrocytes_count,total_count,somata_prop,endothelial_prop,astrocytes_prop\n"

            seg_folder = sample_path / args.seg_dir
            output = sample_path / args.seg_dir / f"{sample_path.name}_segmentation_summary.csv"

            if output.exists():
                print(f"Skipping {sample_path}: '{output}' already exists.")
                continue
            with open(output, 'w') as output_csv:
                output_csv.write(header)
                process_and_write_line(sample_path.name, seg_folder, output_csv, labels_dict)

            progress.update(task_id, advance=1)

    verbose_end_msg()


if __name__ == '__main__':
    main()