@inproceedings{b7e9f026e85f432b9b9ffd21a5cfcdbd,
title = "Summarization-guided greedy optimization of machine learning model",
abstract = "Immense amounts of unstructured data account for up to 90% of all human generated data, yet the attempts to extract significant value from it with Machine Learning (ML) and Big Data (BD) technologies yield limited successes. We propose a generic approach to deep data summarization and subsequent automated ML design optimization to extract maximum predictive value from big data. Knowledge summarization is a central component of the proposed methodology and we argue that coupled with strictly linear modeling complexity, hierarchical decomposition and optimized model design may define a backbone of the new platform for automated and scalable construction of robust ML models. We consider ML build process as data journeys through the layers of modeling that consistently follow the same patterns of data summarization and transformation at the subsequent layers of abstraction. In such framework we argue that the robust construction of the ML model can be achieved through hierarchical greedy optimization of the links between connected ML model components. We demonstrate several case studies of deep data summarization and automated ML model design on text, numerical time series and images data. We point out that application awareness allows to deepen data summarizations while maintaining or improving its predictive value.",
keywords = "Backward-forward search, Big data, Data summarization, Feature selection, Machine learning, Meta-learning",
author = "Dymitr Ruta and Ling Cen and Ernesto Damiani",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG 2017.; 13th International Conference on Machine Learning and Data Mining in Pattern Recognition, MLDM 2017 ; Conference date: 15-07-2017 Through 20-07-2017",
year = "2017",
doi = "10.1007/978-3-319-62416-7_22",
language = "British English",
isbn = "9783319624150",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "306--321",
editor = "Petra Perner",
booktitle = "Machine Learning and Data Mining in Pattern Recognition - 13th International Conference, MLDM 2017, Proceedings",
address = "Germany",
}