@inproceedings{265d2111fa1b43cb97279abea7339ae5,
title = "Comparative analysis of hidden Markov models for multi-modal dialogue scene indexing",
abstract = "A class of audio-visual content is segmented into dialogue scenes using the state transitions of a novel hidden Markov model (HMM). Each shot is classified using both audio track and visual content to determine the state/scene transitions of the model. After simulations with circular and left-to-right HMM topologies, it is observed that both are performing very good with multi-modal inputs. Moreover, for circular topology, the comparisons between different training and observation sets show that audio and face information together gives the most consistent results among different observation sets.",
author = "{Aydin Alatan}, A. and Akansu, {Ali N.} and Wayne Wolf",
note = "Publisher Copyright: {\textcopyright} 2000 IEEE.; 25th IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2000 ; Conference date: 05-06-2000 Through 09-06-2000",
year = "2000",
doi = "10.1109/ICASSP.2000.859325",
language = "English (US)",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "2401--2404",
booktitle = "Image and Multidimensional Signal ProcessingMultimedia Signal Processing",
address = "United States",
}