@inproceedings{efbd67c9e23844c2846413ca8e71da64,
title = "Tool for classifying office documents",
abstract = "This paper presents the design of a tool for classifying office documents. We represent a document's layout structure using an ordered labeled tree, called the 'layout structure tree' (L-S-Tree), based on a nested segmentation procedure. The tool uses a sample-based approach for learning where concepts are learned by retaining samples and new documents are classified by matching their L-S-Trees with samples. The matching process involves both computing the edit distance between two trees using a previously developed pattern matching toolkit, and calculating the degree of conceptual closeness between the documents and samples. Our experimental results show that the tool is capable of classifying various types of office documents, even with very few samples in the sample base.",
author = "Xiaolong Hao and Wang, {Jason T.} and Bieber, {Michael P.} and Ng, {Peter A.}",
year = "1993",
language = "English (US)",
isbn = "0818642009",
series = "Proceedings of the International Conference on Tools with Artificial Intelligence",
publisher = "Publ by IEEE",
pages = "427--434",
editor = "Anon",
booktitle = "Proceedings of the International Conference on Tools with Artificial Intelligence",
note = "Proceedings of the 5th International Conference on Tools with Artificial Intelligence TAI '93 ; Conference date: 08-11-1993 Through 11-11-1993",
}