@inproceedings{b415b8ff67db46e4a4a6f7836e8f6858,
title = "Beating Gradient Boosting: Target-Guided Binning for Massively Scalable Classification in Real-Time",
abstract = "Gradient Boosting (GB) consistently outperforms other ML predictors especially in the context of binary classification based on multi-modal data of different forms and types. Its newest efficient implementations, including XGBoost, LGBM and CATBoost, push GB even further ahead with fast GPU-accelerated compute engine and optimized handling of categorical features. In an attempt to beat GB in both the performance and processing speed we propose a new simple yet fast and robust classification model based on predictive binning. At first all features undergo massively parallelized binning into a unified ordinally compressed risk representation, independently optimized to maximize the AUC score against the target. The resultant array of summarized micro-predictors, resembling 0-depth decision trees, directly expressing oridnally represented target risk, are then passed through the greedy feature selection to compose a robust wide-margin voting classifier, whose performance can beat GB while the extreme build and execution speed along with highly compressed representation welcomes extreme data sizes and realtime applicability. The model has been applied to detect cyber-security attacks on IoT devices within FedCSIS'2023 Challenge and scored 2nd place with the AUC ≈ 1, leaving behind all the latest GB variants in performance and speed.",
author = "Dymitr Ruta and Ming Liu and Ling Cen",
note = "Publisher Copyright: {\textcopyright} 2023 Polish Information Processing Society.; 18th Conference on Computer Science and Intelligence Systems, FedCSIS 2023 ; Conference date: 17-09-2023 Through 20-09-2023",
year = "2023",
doi = "10.15439/2023F7166",
language = "British English",
series = "Proceedings of the 18th Conference on Computer Science and Intelligence Systems, FedCSIS 2023",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1301--1306",
editor = "Maria Ganzha and Leszek Maciaszek and Leszek Maciaszek and Marcin Paprzycki and Dominik Slezak and Dominik Slezak and Dominik Slezak",
booktitle = "Proceedings of the 18th Conference on Computer Science and Intelligence Systems, FedCSIS 2023",
address = "United States",
}