Source code for unravel.abca.sunburst.sunburst_filter
#!/usr/bin/env python3
"""
Use ``abca_sunburst_filter`` or ``sbf`` from UNRAVEL to filter ABCA sunburst expression data, keeping cells with high expression at any level (class, subclass, supertype, cluster).
Prereqs:
- ``abca_suburst_expression``
Notes:
- Use LUTs from ``abca_suburst_expression`` for coloring the sunburst plot.
Usage for first run:
--------------------
abca_sunburst_filter -i path/main_ABCA_sunburst_expression.csv -g geneX -o ABCA_sunburst_filtered/ [-n] [-c 10] [-t 6]
Usage to apply filtering from another dataset:
----------------------------------------------
abca_sunburst_filter -i path/_main_ABCA_sunburst_filter.csv -a path/secondary_ABCA_sunburst_expression.csv -g geneX -o new_output [-n] [-c 10] [-t 6]
"""
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
import pandas as pd
import shutil
from pathlib import Path
from rich import print
from rich.traceback import install
from unravel.core.help_formatter import RichArgumentParser, SuppressMetavar, SM
from unravel.core.config import Configuration
from unravel.core.utils import log_command, verbose_start_msg, verbose_end_msg
[docs]
def parse_args():
parser = RichArgumentParser(formatter_class=SuppressMetavar, add_help=False, docstring=__doc__)
reqs = parser.add_argument_group('Required arguments')
reqs.add_argument('-i', '--input', help='path/main_ABCA_sunburst_expression.csv (the primary file for determining filtering by expression)', required=True, action=SM)
opts = parser.add_argument_group('Optional args')
opts.add_argument('-t', '--threshold', help='Log2(CPM+1) threshold for percent gene expression (rec: use same thresh as for ABCA_sunburst_expression.py). Default: 6', default=6, type=float, action=SM)
opts.add_argument('-l', '--level', help='Level to filter on. Default: supertype', default='supertype', action=SM)
opts.add_argument('-o', '--output', help='Output dir path. Default: ABCA_sunburst_filtered_thr6_<level>', default=None, action=SM)
opts.add_argument('-a', '--apply_to', help='path/secondary_ABCA_sunburst_expression.csv (the secondary file to filter based on the primary file)', action=SM)
general = parser.add_argument_group('General arguments')
general.add_argument('-v', '--verbose', help='Increase verbosity. Default: False', action='store_true', default=False)
return parser.parse_args()
[docs]
@log_command
def main():
install()
args = parse_args()
Configuration.verbose = args.verbose
verbose_start_msg()
main_sunburst_exp_df = pd.read_csv(args.input)
print('\nMain sunburst expression data:')
print(main_sunburst_exp_df)
# Keep cells that have high mean expression (e.g., >= 6) of the gene of interest at the specified level
cell_mean_col = f'{args.level}_mean'
cells_df_filtered = main_sunburst_exp_df[
(main_sunburst_exp_df[cell_mean_col] >= args.threshold)
]
print('\nMain sunburst expression data after filtering:')
print(cells_df_filtered)
# Save the filtered results
if args.output:
output_dir = Path(args.output)
else:
output_dir = Path(f'ABCA_sunburst_filtered_thr{args.threshold}_{args.level}')
output_dir.mkdir(parents=True, exist_ok=True)
output_path = output_dir / str(Path(args.input).name).replace('.csv', f'_filtered.csv')
cells_df_filtered.to_csv(output_path, index=False)
# Copy the mean expression LUT
mean_lut = str(args.input).replace(f'expression_thr{args.threshold}.csv', 'mean_expression_lut.txt')
if Path(mean_lut).exists():
shutil.copy(mean_lut, output_dir)
if args.apply_to:
# Get list of clusters with high expression
filtered_clusters = cells_df_filtered['cluster'].unique()
print(f"\nClusters with high expression: {filtered_clusters}")
# Load the secondary dataset
secondary_sunburst_exp_df = pd.read_csv(args.apply_to)
print('\nSecondary sunburst expression data:')
print(secondary_sunburst_exp_df)
# Filter the secondary dataset
secondary_cells_df_filtered = secondary_sunburst_exp_df[secondary_sunburst_exp_df['cluster'].isin(filtered_clusters)]
print('\nSecondary sunburst expression data after filtering:')
print(secondary_cells_df_filtered)
# Save the filtered results
output_path = output_dir / str(Path(args.apply_to).name).replace('.csv', f'_filtered_with_{Path(args.input).name}')
secondary_cells_df_filtered.to_csv(output_path, index=False)
# Copy the mean expression LUT
if str(args.apply_to).endswith(f'expression_thr{args.threshold}.csv'): # scRNA-seq data
mean_lut = str(args.apply_to).replace(f'expression_thr{args.threshold}.csv', 'mean_expression_lut.txt')
elif str(args.apply_to).endswith(f'expression_summary.csv'): # MERFISH data
mean_lut = str(args.apply_to).replace(f'expression_summary.csv', 'mean_expression_lut.txt')
if Path(mean_lut).exists():
shutil.copy(mean_lut, output_dir)
verbose_end_msg()
if __name__ == '__main__':
main()