@article{cb6fb2da5ce340149c2525448ea25209,
title = "Batch effects removal for microbiome data via conditional quantile regression",
abstract = "Batch effects in microbiome data arise from differential processing of specimens and can lead to spurious findings and obscure true signals. Strategies designed for genomic data to mitigate batch effects usually fail to address the zero-inflated and over-dispersed microbiome data. Most strategies tailored for microbiome data are restricted to association testing or specialized study designs, failing to allow other analytic goals or general designs. Here, we develop the Conditional Quantile Regression (ConQuR) approach to remove microbiome batch effects using a two-part quantile regression model. ConQuR is a comprehensive method that accommodates the complex distributions of microbial read counts by non-parametric modeling, and it generates batch-removed zero-inflated read counts that can be used in and benefit usual subsequent analyses. We apply ConQuR to simulated and real microbiome datasets and demonstrate its advantages in removing batch effects while preserving the signals of interest.",
author = "Wodan Ling and Jiuyao Lu and Ni Zhao and Anju Lulla and Plantinga, {Anna M.} and Weijia Fu and Angela Zhang and Hongjiao Liu and Hoseung Song and Zhigang Li and Jun Chen and Randolph, {Timothy W.} and Koay, {Wei Li A.} and White, {James R.} and Launer, {Lenore J.} and Fodor, {Anthony A.} and Meyer, {Katie A.} and Wu, {Michael C.}",
note = "Funding Information: This work was supported in part by R01 GM129512 (M.C.W.) and R01 HL155417 (M.C.W.). The Coronary Artery Risk Development in Young Adults Study (CARDIA) is supported by contracts HHSN268201800003I, HHSN268201800004I, HHSN268201800005I, HHSN268201800006I, and HHSN268201800007I from the National Heart, Lung, and Blood Institute (NHLBI). The HIVRC data used in this study are from work that was supported by the HIV Microbiome Re-analysis Consortium. The authors thank Drs. Susan A. Tuddenham and Cynthia L. Sears for their support and review of the manuscript, also thank Dr. Khalil G. Ghanem and all members on the HIV Microbiome Re-analysis Consortium for collecting and processing the HIVRC dataset. Finally, we thank Dr. Ying Zhou for helping generate simulation plots. Funding Information: This work was supported in part by R01 GM129512 (M.C.W.) and R01 HL155417 (M.C.W.). The Coronary Artery Risk Development in Young Adults Study (CARDIA) is supported by contracts HHSN268201800003I, HHSN268201800004I, HHSN268201800005I, HHSN268201800006I, and HHSN268201800007I from the National Heart, Lung, and Blood Institute (NHLBI). The HIVRC data used in this study are from work that was supported by the HIV Microbiome Re-analysis Consortium. The authors thank Drs. Susan A. Tuddenham and Cynthia L. Sears for their support and review of the manuscript, also thank Dr. Khalil G. Ghanem and all members on the HIV Microbiome Re-analysis Consortium for collecting and processing the HIVRC dataset. Finally, we thank Dr. Ying Zhou for helping generate simulation plots. Publisher Copyright: {\textcopyright} 2022, The Author(s).",
year = "2022",
month = dec,
doi = "10.1038/s41467-022-33071-9",
language = "English (US)",
volume = "13",
journal = "Nature communications",
issn = "2041-1723",
publisher = "Nature Publishing Group",
number = "1",
}