@article{d4cd1b68348c49169a52cf8331756422,
title = "Complementary classification approaches for protein sequences",
abstract = "We have studied five methods of protein classification and have applied them to the 768 groups of related proteins in the PROSITE catalog. Four of these methods are based on searching a database of blocks, and the other uses the frequently occurring motifs found in the protein families combined with a fingerprint technique. Our experimental results show that the block-based methods perform well when taking into account the probability of amino acids occurring in a block. Furthermore, the dive methods give information that is complementary to each other. Thus, using the five methods together, one can obtain high confidence classifications (if the results agree) or suggest alternative hypotheses (if the results disagree). We also list those proteins whose current families documented in the PROSITE catalog differ from those suggested by our results. There are remarkably few of them, which is a testimony to the quality of PROSITE.",
keywords = "Block searching, Protein classification, Protein database, SWISS-PROT, Statistical approaches",
author = "Wang, {Jason T.L.} and Marr, {Thomas G.} and Dennis Shasha and Shapiro, {Bruce A.} and Chirn, {Gung Wei} and Lee, {T. Y.}",
note = "Funding Information: The authors thank the anonymous referees for their constructive criticism and useful comments which helped to improve both the quality and presentation of this paper. We also thank Dr Eugene Koonin for offering programs to expand blocks and calculate weight matrices; Dr Steven Henikoff and Jorja Henikoff for offering programs to generate PROSITE groups and for detailed explanation regarding the calculation of the weight matrix used by the BLOCKS server, and Professor David Haussler for pointing out an error concerning Dirichlet mixture densities in an early version of the paper. This work was supported, in part, by the National Science Foundation under grants IRI-8901699, CCR-9103953, IRI-9224601 and IR1-9224602, by the Office of Naval Research under grants N00014-90-J-1110, N000I4-91-J-1472 and N00014-92-J-1719, by the New Jersey Institute of Technology under grant number 421280 and by a grant from the AT&T Foundation. T.G.M. was supported by the Department of Energy under grant DE-FG02-91ER61190 and by the National Institutes of Health under grant 1R01-HGOO20301A1.",
year = "1996",
month = may,
doi = "10.1093/protein/9.5.381",
language = "English (US)",
volume = "9",
pages = "381--386",
journal = "Protein Engineering",
issn = "0269-2139",
publisher = "Oxford University Press",
number = "5",
}