Destacadas
Díaz-Verdejo, Jesús E.; Estepa Alonso, Rafael; Estepa Alonso, Antonio; Muñoz-Calle, F. J.; Madinabeitia, German
Building a large, realistic and labeled HTTP URI dataset for anomaly-based intrusion detection systems: Biblio-US17 Artículo de revista En preparación
En: Cybersecurity, En preparación, ISSN: 2523-3246.
@article{Biblio24,
title = {Building a large, realistic and labeled HTTP URI dataset for anomaly-based intrusion detection systems: Biblio-US17 },
author = {Jesús E. {Díaz-Verdejo} and {Estepa Alonso}, Rafael and {Estepa Alonso}, Antonio and F. J. Muñoz-Calle and German
Madinabeitia},
doi = {https://doi.org/10.1186/s42400‑024‑00336‑3},
issn = {2523-3246},
year = {2024},
date = {2024-12-11},
urldate = {2024-12-11},
journal = {Cybersecurity},
abstract = {This paper introduces Biblio-US17, a labeled dataset collected over 6 months from the log files of a popular public website at the University of Seville. It contains 47 million records, each including the method, uniform resource identifier (URI) and associated response code and size of every request received by the web server. Records have been classified as either normal or attack using a comprehensive semi-automated process, which involved signature-based detection, assisted inspection of URIs vocabulary, and substantial expert manual supervision. Unlike comparable datasets, this one offers a genuine real-world perspective on the normal operation of an active website, along with an unbiased proportion of actual attacks (i.e., non-synthetic). This makes it ideal for evaluating and comparing anomalybased approaches in a realistic environment. Its extensive size and duration also make it valuable for addressing challenges like data shift and insufficient training. This paper describes the collection and labeling processes, dataset structure, and most relevant properties. We also include an example of an application for assessing the performance of a simple anomaly detector. Biblio-US17, now available to the scientific community, can also be used to model the URIs used by current web servers.},
keywords = {},
pubstate = {forthcoming},
tppubtype = {article}
}
Lara, Agustín; Estepa, Antonio; Estepa, Rafael; Díaz-Verdejo, Jesús E.; Mayor, Vicente
Anomaly-based Intrusion Detection System for smart lighting Artículo de revista
En: Internet of Things, vol. 28, pp. 101427, 2024, ISSN: 2542-6605.
@article{LARA2024101427,
title = {Anomaly-based Intrusion Detection System for smart lighting},
author = {Agustín Lara and Antonio Estepa and Rafael Estepa and Jesús E. Díaz-Verdejo and Vicente Mayor},
url = {https://www.sciencedirect.com/science/article/pii/S2542660524003688},
doi = {https://doi.org/10.1016/j.iot.2024.101427},
issn = {2542-6605},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
journal = {Internet of Things},
volume = {28},
pages = {101427},
abstract = {Smart Lighting Systems (SLS) are essential to smart cities, offering enhanced energy efficiency and public safety. However, they are susceptible to security threats, potentially leading to safety risks and service disruptions, making the protection of this infrastructure critical. This paper presents an anomaly-based Intrusion Detection System (IDS) designed for a real-world operational SLS. As commercial deployments vary in components, protocols, and functionalities, IDSs must be tailored to the specific characteristics of each deployment to perform effectively. Our anomaly-based IDS has been defined based on the properties of the available data and the types of attacks we aim to detect, offering both explainability and low complexity. The proposed system identifies anomalies in seven features of network traffic and in the telemetry data received at the central control (O&M) server. For the latter, we designed three customized detectors to identify abnormal data points, persistent deviations in street lamp power consumption, and abnormal power value based on the time of day. Validation with real-world data and simulated attacks demonstrates the effectiveness of our approach. Network attacks (e.g., DoS, scanning) were detected by at least one of the seven flow-related anomaly detectors, while simulated data poisoning attacks and operational technology (OT) issues were detected with nearly 90% accuracy. The datasets used in this work are publicly available and may serve as reference for the design of future IDSs. While our detectors were designed specifically for our dataset, the variables examined and vulnerabilities addressed are common in most commercial SLSs.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Walabonso Lara, Agustín; Mayor, Vicente; Estepa Alonso, Rafael; Estepa Alonso, Antonio; Díaz-Verdejo, Jesús E.
Smart home anomaly-based IDS: Architecture proposal and case study Artículo de revista
En: Internet of Things, vol. 22, pp. 100773, 2023, ISSN: 2542-6605.
@article{Lara2023,
title = {Smart home anomaly-based IDS: Architecture proposal and case study},
author = { {Walabonso Lara}, Agustín and Vicente Mayor and {Estepa Alonso}, Rafael and {Estepa Alonso} , Antonio and Jesús E. {Díaz-Verdejo}},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2542660523000963},
doi = {10.1016/J.IOT.2023.100773},
issn = {2542-6605},
year = {2023},
date = {2023-07-01},
urldate = {2023-07-01},
journal = {Internet of Things},
volume = {22},
pages = {100773},
publisher = {Elsevier},
abstract = {The complexity and diversity of the technologies involved in the Internet of Things (IoT) challenge the generalization of security solutions based on anomaly detection, which should fit the particularities of each context and deployment and allow for performance comparison. In this work, we provide a flexible architecture based on building blocks suited for detecting anomalies in the network traffic and the application-layer data exchanged by IoT devices in the context of Smart Home. Following this architecture, we have defined a particular Intrusion Detector System (IDS) for a case study that uses a public dataset with the electrical consumption of 21 home devices over one year. In particular, we have defined ten Indicators of Compromise (IoC) to detect network attacks and two anomaly detectors to detect false command or data injection attacks. We have also included a signature-based IDS (Snort) to extend the detection range to known attacks. We have reproduced eight network attacks (e.g., DoS, scanning) and four False Command or Data Injection attacks to test our IDS performance. The results show that all attacks were successfully detected by our IoCs and anomaly detectors with a false positive rate lower than 0.3%. Signature detection was able to detect only 4 out of 12 attacks. Our architecture and the IDS developed can be a reference for developing future IDS suited to different contexts or use cases. Given that we use a public dataset, our contribution can also serve as a baseline for comparison with new techniques that improve detection performance.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Díaz-Verdejo, Jesús E.; Estepa Alonso, Rafael; Estepa Alonso, Antonio; Madinabeitia, German
A critical review of the techniques used for anomaly detection of HTTP-based attacks: taxonomy, limitations and open challenges Artículo de revista
En: Computers and Security, vol. 124, pp. 102997, 2023, ISSN: 01674048.
@article{Diaz-Verdejo2023,
title = {A critical review of the techniques used for anomaly detection of HTTP-based attacks: taxonomy, limitations and open challenges},
author = {Jesús E. Díaz-Verdejo and {Estepa Alonso}, Rafael and {Estepa Alonso}, Antonio and German Madinabeitia},
doi = {10.1016/j.cose.2022.102997},
issn = {01674048},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Computers and Security},
volume = {124},
pages = {102997},
abstract = {Intrusion Detection Systems (IDSs) and Web Application Firewalls (WAFs) offer a crucial layer of defense that allows organizations to detect cyberattacks on their web servers. Academic research overwhelmingly suggests using anomaly detection techniques to improve the performance of these defensive systems. However, analyzing and comparing the wide range of solutions in the scientific literature is challenging since they are typically presented as isolated (unrelated) contributions, and their results cannot be generalized. We believe that this impairs the industry's adoption of academic results and the advancement of research in this field. This paper aims to shed light on the literature on anomaly-based detection of attacks that use HTTP request messages. We define a novel framework for anomaly detection based on six data processing steps grouped into two sequential phases: preprocessing and classification. Based on this framework, we provide a taxonomy and critical review of the techniques surveyed, emphasizing their limitations and applicability. Future approaches should take advantage of the syntax and semantics of the Uniform Resource Locator (URL), be scalable, and address their obsolescence. These aspects are frequently overlooked in the literature and pose a significant challenge in the current era of web services. For better comparability, authors should use adequate public datasets, follow a thorough methodology, and use appropriate metrics that fully show the pros and cons of the approach.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Díaz-Verdejo, J. E.; Muñoz-Calle, F. J.; Estepa Alonso, A.; Estepa Alonso, R.; Madinabeitia, G.
On the Detection Capabilities of Signature-Based Intrusion Detection Systems in the Context of Web Attacks Artículo de revista
En: Applied Sciences, vol. 12, no 2, pp. 852, 2022, ISSN: 20763417.
@article{Diaz-Verdejo2022,
title = {On the Detection Capabilities of Signature-Based Intrusion Detection Systems in the Context of Web Attacks},
author = {J. E. Díaz-Verdejo and F. J. Muñoz-Calle and {Estepa Alonso}, A. and {Estepa Alonso}, R. and G. Madinabeitia},
url = {https://www.mdpi.com/2076-3417/12/2/852/htm https://www.mdpi.com/2076-3417/12/2/852},
doi = {10.3390/app12020852},
issn = {20763417},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Applied Sciences},
volume = {12},
number = {2},
pages = {852},
publisher = {Multidisciplinary Digital Publishing Institute},
abstract = {Signature-based Intrusion Detection Systems (SIDS) play a crucial role within the arsenal of security components of most organizations. They can find traces of known attacks in the network traffic or host events for which patterns or signatures have been pre-established. SIDS include standard packages of detection rulesets, but only those rules suited to the operational environment should be activated for optimal performance. However, some organizations might skip this tuning process and instead activate default off-the-shelf rulesets without understanding its implications and trade-offs. In this work, we help gain insight into the consequences of using predefined rulesets in the performance of SIDS. We experimentally explore the performance of three SIDS in the context of web attacks. In particular, we gauge the detection rate obtained with predefined subsets of rules for Snort, ModSecurity and Nemesida using seven attack datasets. We also determine the precision and rate of alert generated by each detector in a real-life case using a large trace from a public webserver. Results show that the maximum detection rate achieved by the SIDS under test is insufficient to protect systems effectively and is lower than expected for known attacks. Our results also indicate that the choice of predefined settings activated on each detector strongly influences its detection capability and false alarm rate. Snort and ModSecurity scored either a very poor detection rate (activating the less-sensitive predefined ruleset) or a very poor precision (activating the full ruleset). We also found that using various SIDS for a cooperative decision can improve the precision or the detection rate, but not both. Consequently, it is necessary to reflect upon the role of these open-source SIDS with default configurations as core elements for protection in the context of web attacks. Finally, we provide an efficient method for systematically determining which rules deactivate from a ruleset to significantly reduce the false alarm rate for a target operational environment. We tested our approach using Snort’s ruleset in our real-life trace, increasing the precision from 0.015 to 1 in less than 16 h of work.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Díaz-Verdejo, Jesús E.; Estepa, Antonio; Estepa, Rafael; Madinabeitia, German; Muñoz-Calle, Fco Javier
A methodology for conducting efficient sanitization of HTTP training datasets Artículo de revista
En: Future Generation Computer Systems, vol. 109, pp. 67–82, 2020, ISSN: 0167739X.
@article{Diaz-Verdejo2020,
title = {A methodology for conducting efficient sanitization of HTTP training datasets},
author = {Jesús E. Díaz-Verdejo and Antonio Estepa and Rafael Estepa and German Madinabeitia and Fco Javier Muñoz-Calle},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0167739X19322629},
doi = {10.1016/j.future.2020.03.033},
issn = {0167739X},
year = {2020},
date = {2020-08-01},
urldate = {2020-08-01},
journal = {Future Generation Computer Systems},
volume = {109},
pages = {67--82},
publisher = {Elsevier B.V.},
abstract = {The performance of anomaly-based intrusion detection systems depends on the quality of the datasets used to form normal activity profiles. Suitable datasets should include high volumes of real-life data free from attack instances. On account of this requirement, obtaining quality datasets from collected data requires a process of data sanitization that may be prohibitive if done manually, or uncertain if fully automated. In this work, we propose a sanitization approach for obtaining datasets from HTTP traces suited for training, testing, or validating anomaly-based attack detectors. Our methodology has two sequential phases. In the first phase, we clean known attacks from data using a pattern-based approach that relies on tools that detect URI-based known attacks. In the second phase, we complement the result of the first phase by conducting assisted manual labeling systematically and efficiently, setting the focus of expert examination not on the raw data (which would be millions of URIs), but on the set of words that compose the URIs. This dramatically downsizes the volume of data that requires expert discernment, making manual sanitization of large datasets feasible. We have applied our method to sanitize a trace that includes 45 million requests received by the library web server of the University of Seville. We were able to generate clean datasets in less than 84 h with only 33 h of manual supervision. We have also applied our method to some public benchmark datasets, confirming that attacks unnoticed by signature-based detectors can be discovered in a reduced time span.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
García-Teodoro, P.; Díaz-Verdejo, J.; Maciá-Fernández, G.; Vázquez, E.
Anomaly-based network intrusion detection: Techniques, systems and challenges Artículo de revista
En: Computers and Security, vol. 28, no 1-2, pp. 18–28, 2009, ISSN: 01674048.
@article{Garcia-Teodoro2009,
title = {Anomaly-based network intrusion detection: Techniques, systems and challenges},
author = {P. García-Teodoro and J. Díaz-Verdejo and G. Maciá-Fernández and E. Vázquez},
doi = {10.1016/j.cose.2008.08.003},
issn = {01674048},
year = {2009},
date = {2009-01-01},
urldate = {2009-01-01},
journal = {Computers and Security},
volume = {28},
number = {1-2},
pages = {18--28},
abstract = {The Internet and computer networks are exposed to an increasing number of security threats. With new types of attacks appearing continually, developing flexible and adaptive security oriented approaches is a severe challenge. In this context, anomaly-based network intrusion detection techniques are a valuable technology to protect target systems and networks against malicious activities. However, despite the variety of such methods described in the literature in recent years, security tools incorporating anomaly detection functionalities are just starting to appear, and several important problems remain to be solved. This paper begins with a review of the most well-known anomaly-based intrusion detection techniques. Then, available platforms, systems under development and research projects in the area are presented. Finally, we outline the main challenges to be dealt with for the wide scale deployment of anomaly-based intrusion detectors, with special emphasis on assessment issues. textcopyright 2008 Elsevier Ltd. All rights reserved.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}