@article{b167788ad3f04d548d5e8e05fa3bd776,
title = "Sphetcher: Spherical Thresholding Improves Sketching of Single-Cell Transcriptomic Heterogeneity",
abstract = "The massive size of single-cell RNA sequencing datasets often exceeds the capability of current computational analysis methods to solve routine tasks such as detection of cell types. Recently, geometric sketching was introduced as an alternative to uniform subsampling. It selects a subset of cells (the sketch) that evenly cover the transcriptomic space occupied by the original dataset, to accelerate downstream analyses and highlight rare cell types. Here, we propose algorithm Sphetcher that makes use of the thresholding technique to efficiently pick representative cells within spheres (as opposed to the typically used equal-sized boxes) that cover the entire transcriptomic space. We show that the spherical sketch computed by Sphetcher constitutes a more accurate representation of the original transcriptomic landscape. Our optimization scheme allows to include fairness aspects that can encode prior biological or experimental knowledge. We show how a fair sampling can inform the inference of the trajectory of human skeletal muscle myoblast differentiation.",
keywords = "Bioinformatics, Data Analysis, Transcriptomics",
author = "Do, \{Van Hoan\} and Khaled Elbassioni and Stefan Canzar",
note = "Funding Information: V.H.D. was supported by a Deutsche Forschungsgemeinschaft fellowship through the Graduate School of Quantitative Biosciences Munich. We thank the Hemberg Group at the Sanger Institute for providing gene counts for datasets muraro, klein, zeisel, and chen, and the authors of Hie et al. (2019) for providing datasets zeiselCNS, saunders, and the dataset of umbilical cord blood cells. Funding Information: V.H.D. was supported by a Deutsche Forschungsgemeinschaft fellowship through the Graduate School of Quantitative Biosciences Munich. We thank the Hemberg Group at the Sanger Institute for providing gene counts for datasets muraro, klein, zeisel, and chen, and the authors of Hie et al. (2019) for providing datasets zeiselCNS, saunders, and the dataset of umbilical cord blood cells. All authors conceived the algorithm. V.H.D. performed the computational experiments. All authors wrote the manuscript. The authors declare no competing interests. Publisher Copyright: {\textcopyright} 2020 The Authors",
year = "2020",
month = jun,
day = "26",
doi = "10.1016/j.isci.2020.101126",
language = "British English",
volume = "23",
journal = "iScience",
issn = "2589-0042",
publisher = "Elsevier Inc.",
number = "6",
}