@inproceedings{fe2d16a2aa67457c8b0d7f545ae20839,
title = "Cross-validation and cross-study validation of kidney cancer with machine learning and whole exome sequences from the National Cancer Institute",
abstract = "Accurate cancer risk prediction from genetic and environment variables is a key problem in medicine. One approach is to use somatic mutations which could potentially be used in early detection and prevention. SNP based studies are the most common ones utilizing this approach, however most studies lack a cross-study validation component across at least two independent studies. Here we explore the cross-validation and cross-study validation of predicting kidney cancer case and controls with SNPs obtained from whole exome sequences at the National Cancer Institute. From the Genomics Data Commons portal we obtained aligned whole exome sequences of two different kidney cancer studies: 110 cases and controls of KIRP for renal papillary cell carcinoma and 34 cases and controls of KICH for kidney chromophobe cell carcinoma. We performed a rigorous quality control procedure to obtain SNPs and rank them with feature selection. On top ranked SNPs we find the support vector machine to obtain a cross-validation accuracy of 71% (with 10 SNPs) and 72% (with 20 SNPs) in KIRP and KICH respectively. We then learn a model on KIRP and with 10 SNPs achieve an accuracy of 66% on the KICH samples. Our work shows that we can predict kidney chromophobe carcinoma from a kidney papillary carcinoma dataset with better than a random classification which would have 50% accuracy. In continuing work we are expanding these sample sizes and extending crossstudy to other kidney cancer datasets in the NCI GDC portal.",
author = "Abdulrhman Aljouie and Usman Roshan and Nihir Patel",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 2018 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology, CIBCB 2018 ; Conference date: 30-05-2018 Through 02-06-2018",
year = "2018",
month = jul,
day = "5",
doi = "10.1109/CIBCB.2018.8404967",
language = "English (US)",
series = "2018 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology, CIBCB 2018",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1--6",
booktitle = "2018 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology, CIBCB 2018",
address = "United States",
}