@inproceedings{29cb0e52259e4e49b74025af02ddb0f7,
title = "Gradient boosting decision trees for cyber security threats detection based on network events logs",
abstract = "The rapid expansion of Internet of Things (IoT) quickly evolves towards a connected network of AI-enabled, smart, multi-sensory devices that generate, consume and exchange enormous amounts of data. Stimulated by the omnipresent cloud-services and ever widening data bandwidth they aspire to control every aspect of our lives from transportation to our health, significantly increasing our reliance on the web-based services and their security. Already millions of cyber security alerts are generated every day and trigger increasingly costly investigations of the Security Operations Centres (SOC). In order to make their operation more efficient security warnings need to be reliably detected and classified based on various levels of severity, scale of potential damage or an ability to defend.We have responded to this challenge in the context of IEEE BigData Cup 2019 focused on predicting cyber security threats that require attention based on detailed logs of network activity leading to the security alert. We have developed a hybrid supervised learning ensemble model combining several state-of-the-art Extreme Gradient Boosting algorithms. Specifically, xGBoost and LightGBM model versions have been built on separate sets of features extracted from the raw logs of network events preceding the alerts and then synergically aggregated. Models' diversity imposed by algorithmic differences, complementary feature subsets, and individually optimized hyperparameters, combined with robust stratified cross-validation scheme resulted with the best true alerts detection rate yielding the AUC score in excess of 0.93, that outperformed all other 248 competing teams.",
author = "Vu, \{Quang Hieu\} and Dymitr Ruta and Ling Cen",
note = "Publisher Copyright: {\textcopyright} 2019 IEEE.; 2019 IEEE International Conference on Big Data, Big Data 2019 ; Conference date: 09-12-2019 Through 12-12-2019",
year = "2019",
month = dec,
doi = "10.1109/BigData47090.2019.9006061",
language = "British English",
series = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "5921--5928",
editor = "Chaitanya Baru and Jun Huan and Latifur Khan and Hu, \{Xiaohua Tony\} and Ronay Ak and Yuanyuan Tian and Roger Barga and Carlo Zaniolo and Kisung Lee and Ye, \{Yanfang Fanny\}",
booktitle = "Proceedings - 2019 IEEE International Conference on Big Data, Big Data 2019",
address = "United States",
}