@inproceedings{72f73a23b067462580cc4f58d89e55c1,
title = "Mining genes in DNA using genescout",
abstract = "In this paper, we present a new system, called GeneScout, for predicting gene structures in vertebrate genomic DNA. The system contains specially designed hidden Markov models (HMMs) for detecting functional sites including protein-translation start sites, mRNA splicing junction donor and acceptor sites, etc. Our main hypothesis is that, given a vertebrate genomic DNA sequence S, it is always possible to construct a directed acyclic graph G such that the path for the actual coding region of S is in the set of all paths on G. Thus, the gene detection problem is reduced to that of analyzing the paths in the graph G. A dynamic programming algorithm is used to find the optimal path in G. The proposed system is trained using an expectation-maximization (EM) algorithm and its performance on vertebrate gene prediction is evaluated using the 10-way cross-validation method. Experimental results show the good performance of the proposed system and its complementarity to a widely used gene detection system.",
keywords = "Bioinformatics, Data mining, Gene finding, Hidden Markov models, Knowledge discovery",
author = "Yin, {Michael M.} and Wang, {Jason T.L.}",
year = "2002",
language = "English (US)",
isbn = "0769517544",
series = "Proceedings - IEEE International Conference on Data Mining, ICDM",
pages = "733--736",
booktitle = "Proceedings - 2002 IEEE International Conference on Data Mining, ICDM 2002",
note = "2nd IEEE International Conference on Data Mining, ICDM '02 ; Conference date: 09-12-2002 Through 12-12-2002",
}