Source code for unravel.allen_institute.mapmysections.segmentation_summary

#!/usr/bin/env python3

"""
Use ``mms_seg_summary`` or ``mms_ss`` from UNRAVEL to summarize the prevalence of voxels for somata, endothelial cells, and astrocytes from Ilastik segmentations.

Note:
    - Designed for MMS segmentations of somata (label 1), endothelial cells (3), and astrocytes (4).
    - For each sample, voxel counts and proportions are computed for each cell type.
    - For example, if a sample has 1000 total segmented voxels, with 600 somatic voxels, 300 endothelial voxels, and 100 astroglia voxels, the proportions would be 0.6, 0.3, and 0.1 respectively.

Prereqs:
    - ``seg_ilastik`` outputs (e.g., MMS_seg/MMS_seg_1.nii.gz, MMS_seg/MMS_seg_3.nii.gz, MMS_seg/MMS_seg_4.nii.gz).

Output:
    - Per-sample CSVs saved to <sample>/MMS_seg/<sample>_segmentation_summary.csv
    - Columns: sample, somata_count, endothelial_count, astrocytes_count, total_count, somata_prop, endothelial_prop, astrocytes_prop

Next steps:
    - Use ``agg`` to aggregate results across samples and cd to the target directory.
    - Use ``concat_with_source`` to merge outputs across samples.
    - If most voxels are endothelial, the sample is likely enriched for endothelial cells (manually verify and revise cell type proportions as needed).
    - If most voxels are astroglial, the sample is likely enriched for astrocytes (manually verify and revise cell type proportions as needed).
    
Usage:
------
    mms_seg_summary [-s seg_dir] [-d path/to/dirs] [-p 'ID_*'] [-v]
"""

import numpy as np
from pathlib import Path
from rich import print
from rich.live import Live
from rich.traceback import install

from unravel.core.config import Configuration
from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.img_io import load_nii
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg, get_samples, initialize_progress_bar


[docs] def parse_args(): parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__) opts = parser.add_argument_group('Optional arguments') opts.add_argument('-s', '--seg_dir', help="Segmentation directory relative to each sample dir. Default: 'MMS_seg'", default="MMS_seg", action=SM) general = parser.add_argument_group('General arguments') general.add_argument('-d', '--dirs', help='Paths to "ID*" dirs and/or dirs containing them (space-separated) for batch processing. Default: current dir', nargs='*', default=None, action=SM) general.add_argument('-p', '--pattern', help='Pattern for directories to process. Default: ID_*', default='ID_*', action=SM) general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False) return parser.parse_args()
# TODO: Add an option(s) to specify which labels to include to make this more generalizable.
[docs] def get_seg_voxel_counts(seg_folder, classes): counts_dict = {} for label, name in classes.items(): # label: name (e.g., 1: 'somata') img_name = f"{seg_folder.name}_{label}.nii.gz" img_path = seg_folder / img_name if not img_path.exists(): print(f"[red]Warning: {img_path} does not exist.") continue img = load_nii(img_path) counts_dict[name] = int(np.count_nonzero(img)) # name: count return counts_dict
[docs] def compute_proportions(counts_dict): total_voxel_count = sum(counts_dict.values()) # Total voxel count across all classes (name: count) if total_voxel_count == 0: return {name: 0.0 for name in counts_dict} return {name: counts_dict[name] / total_voxel_count for name in counts_dict} # name: proportion
[docs] def process_and_write_line(sample, seg_folder, output_csv, classes): counts_dict = get_seg_voxel_counts(seg_folder, classes) # name: count total_voxel_count = sum(counts_dict.values()) # Total voxel count across all classes proportions_dict = compute_proportions(counts_dict) # name: proportion output_csv.write( f"{sample},{counts_dict['somata']},{counts_dict['endothelial']},{counts_dict['astrocytes']},{total_voxel_count},{proportions_dict['somata']:.4f},{proportions_dict['endothelial']:.4f},{proportions_dict['astrocytes']:.4f}\n" )
[docs] @log_command def main(): install() args = parse_args() Configuration.verbose = args.verbose verbose_start_msg() sample_paths = get_samples(args.dirs, args.pattern, args.verbose) progress, task_id = initialize_progress_bar(len(sample_paths), "[red]Processing samples...") with Live(progress): for sample_path in sample_paths: labels_dict = {1: 'somata', 3: 'endothelial', 4: 'astrocytes'} header = "sample,somata_count,endothelial_count,astrocytes_count,total_count,somata_prop,endothelial_prop,astrocytes_prop\n" seg_folder = sample_path / args.seg_dir output = sample_path / args.seg_dir / f"{sample_path.name}_segmentation_summary.csv" if output.exists(): print(f"Skipping {sample_path}: '{output}' already exists.") continue with open(output, 'w') as output_csv: output_csv.write(header) process_and_write_line(sample_path.name, seg_folder, output_csv, labels_dict) progress.update(task_id, advance=1) verbose_end_msg()
if __name__ == '__main__': main()