@inproceedings{9f97338c4b8a40c2a1c68a1f11a23bcd,
title = "Semi-supervised latent Dirichlet allocation and its application for document classification",
abstract = "Latent Dirichlet Allocation (LDA) is an unsupervised topic modeling method widely applied in natural language processing. However, standard LDA does not permit the use of supervised labels to incorporate expert knowledge into the learning procedure. This paper describes a semi-supervised LDA (ssLDA) method that supports multiple-topic labels per document, to incorporate available expert knowledge during the model construction. This improvement enables the alignment of resulting model with human expectations for topic modeling and extraction. We apply ssLDA to document classification problem on benchmark datasets. We investigate and compare how the size of training set and proportion of supervised data affect the final model structure and improve the prediction accuracy.",
keywords = "Latent Dirichlet allocation (LDA), natural language processing, semi-supervised LDA, semi-supervised learning, supervised learning, unsuperviased learning",
author = "Di Wang and Marcus Thint and Ahmad Al-Rubaie",
year = "2012",
doi = "10.1109/WI-IAT.2012.211",
language = "British English",
isbn = "9780769548807",
series = "Proceedings of the 2012 IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology Workshops, WI-IAT 2012",
pages = "306--310",
booktitle = "Proceedings of the 2012 IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology Workshops, WI-IAT 2012",
note = "2012 IEEE/WIC/ACM International Conference on Web Intelligence and Intelligent Agent Technology Workshops, WI-IAT 2012 ; Conference date: 04-12-2012 Through 07-12-2012",
}