Source code for unravel.allen_institute.abca.sunburst.sunburst

#!/usr/bin/env python3

"""
Use ``abca_sunburst`` or ``sb`` from UNRAVEL to generate a sunburst plot of cell type proportions across all ontological levels.

Prereqs: 
    - merfish_filter.py or RNAseq_expression.py + RNAseq_join_expression_data.py to generate the input cell metadata.
    
Outputs:
    - path/input_sunburst.csv and [WMB_sunburst_colors.csv or HMB_sunburst_colors.csv if --output_lut is provided]

Note:
    - LUT location: unravel/core/csvs/ABCA/

Next steps:
    - Use input_sunburst.csv to make a sunburst plot or regional volumes in Flourish Studio (https://app.flourish.studio/)
    - It can be pasted into the Data tab (categories columns = cell type columns, Size by = percent column)
    - Preview tab: Hierarchy -> Depth to 5, Colors -> paste hex codes from ..._sunburst_colors.csv into Custom overrides

Usage:
------ 
    abca_sunburst -i path/cell_metadata_filtered.csv [-n] [-l] [-v]
"""

import numpy as np
import pandas as pd
import shutil
from pathlib import Path
from rich import print
from rich.traceback import install

from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration 
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg


[docs] def parse_args(): parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__) reqs = parser.add_argument_group('Required arguments') reqs.add_argument('-i', '--input', help='path/cell_metadata_filtered.csv', required=True, action=SM) opts = parser.add_argument_group('Optional args') opts.add_argument('-s', '--species', help='Species to analyze ("mouse" or "human"). Default: mouse', default='mouse', action=SM) opts.add_argument('-n', '--neurons', help='Filter out non-neuronal cells. Default: False', action='store_true', default=False) opts.add_argument('-l', '--output_lut', help='Output WMB_sunburst_colors.csv if flag provided (for ABCA coloring)', action='store_true') general = parser.add_argument_group('General arguments') general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False) return parser.parse_args()
[docs] def filter_non_neuronal_cells(cells_df, species): """Filter out non-neuronal cells based on the species.""" species = species.lower() if species == 'mouse': return cells_df[cells_df['class'].str.split().str[0].astype(int) <= 29] elif species == 'human': nonneurons = ['Oligodendrocyte', 'Committed oligodendrocyte precursor', 'Oligodendrocyte precursor', 'Astrocyte', 'Ependymal', 'Microglia', 'Vascular', 'Bergmann glia', 'Fibroblast', 'Choroid plexus'] return cells_df[~cells_df['supercluster'].str.split().str[0].isin(nonneurons)] else: raise ValueError(f"Unsupported species: {species}. Supported species are 'mouse' and 'human'.")
[docs] @log_command def main(): install() args = parse_args() Configuration.verbose = args.verbose verbose_start_msg() species = args.species.lower() # Load the CSV file if species == 'mouse': cells_df = pd.read_csv(args.input, usecols=['neurotransmitter', 'class', 'subclass', 'supertype', 'cluster']) elif species == 'human': cells_df = pd.read_csv(args.input, usecols=['neurotransmitter', 'supercluster', 'cluster', 'subcluster']) else: raise ValueError(f"Unsupported species: {args.species}. Supported species are 'mouse' and 'human'.") # Replace blank values in 'neurotransmitter' column with 'NA' cells_df['neurotransmitter'] = cells_df['neurotransmitter'].fillna('NA') if args.neurons: cells_df = filter_non_neuronal_cells(cells_df, species) # Groupby the finest level of granularity (cluster or subcluster) to calculate the percentage of cells in each cell type fine_level_col = 'subcluster' if 'subcluster' in cells_df.columns else 'cluster' fine_df = cells_df.groupby(fine_level_col).size().reset_index(name='counts') # Count the number of cells in each cluster fine_df = fine_df.sort_values('counts', ascending=False) # Sort the clusters by the number of cells # Add a column for the percentage of cells in each fine level cell type fine_df['percent'] = fine_df['counts'] / fine_df['counts'].sum() * 100 # Drop the 'counts' column fine_df = fine_df.drop(columns='counts') # Join the cells_df with the fine_df cells_df = cells_df.merge(fine_df, on=fine_level_col) # Drop duplicate rows cells_df = cells_df.drop_duplicates() # Sort by percentage cells_df = cells_df.sort_values('percent', ascending=False).reset_index(drop=True) # If human, insert an empty column after subcluster and before percent if species == 'human': cells_df.insert(cells_df.columns.get_loc('subcluster') + 1, '.', '.') print(f'\n{cells_df}\n') # Save the output to a CSV file output_path = str(Path(args.input)).replace('.csv', '_sunburst.csv') cells_df.to_csv(output_path, index=False) if args.output_lut: if species == 'mouse': lut_path = Path(__file__).parent.parent.parent.parent.parent / 'unravel' / 'core' / 'csvs' / 'ABCA' / 'WMB_sunburst_colors.csv' elif species == 'human': lut_path = Path(__file__).parent.parent.parent.parent.parent / 'unravel' / 'core' / 'csvs' / 'ABCA' / 'WHB_sunburst_colors.csv' shutil.copy(lut_path, Path(args.input).parent / lut_path.name) verbose_end_msg()
if __name__ == '__main__': main()