{"id":1337,"date":"2023-04-10T12:16:31","date_gmt":"2023-04-10T10:16:31","guid":{"rendered":"https:\/\/dtstc.ugr.es\/neus-cslab\/?page_id=1337"},"modified":"2025-07-28T17:42:22","modified_gmt":"2025-07-28T15:42:22","slug":"ds-biblio","status":"publish","type":"page","link":"https:\/\/dtstc.ugr.es\/neus-cslab\/recursos\/ds-biblio\/","title":{"rendered":"Biblio-US17 &#8211; Dataset HTTP"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-page\" data-elementor-id=\"1337\" class=\"elementor elementor-1337\">\n\t\t\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-90ef653 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"90ef653\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-223fe18\" data-id=\"223fe18\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-b057abc elementor-widget elementor-widget-image\" data-id=\"b057abc\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img fetchpriority=\"high\" decoding=\"async\" width=\"700\" height=\"371\" src=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/datsshet_for_dataset.jpg\" class=\"attachment-large size-large wp-image-1982\" alt=\"\" srcset=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/datsshet_for_dataset.jpg 700w, https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/datsshet_for_dataset-300x159.jpg 300w\" sizes=\"(max-width: 700px) 100vw, 700px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f0e3064 elementor-widget elementor-widget-progress\" data-id=\"f0e3064\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"progress.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t<span class=\"elementor-title\" id=\"elementor-progress-bar-f0e3064\">\n\t\t\t\tProgreso\t\t\t<\/span>\n\t\t\n\t\t<div aria-labelledby=\"elementor-progress-bar-f0e3064\" class=\"elementor-progress-wrapper\" role=\"progressbar\" aria-valuemin=\"0\" aria-valuemax=\"100\" aria-valuenow=\"100\">\n\t\t\t<div class=\"elementor-progress-bar\" data-max=\"100\">\n\t\t\t\t<span class=\"elementor-progress-text\"><\/span>\n\t\t\t\t\t\t\t\t\t<span class=\"elementor-progress-percentage\">100%<\/span>\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-66 elementor-top-column elementor-element elementor-element-b824485\" data-id=\"b824485\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-a3ee6be elementor-widget elementor-widget-heading\" data-id=\"a3ee6be\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h3 class=\"elementor-heading-title elementor-size-default\">BIBLIO-US17: Dataset HTTP<\/h3>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-01e4fb5 elementor-position-inline-start elementor-view-default elementor-mobile-position-block-start elementor-widget elementor-widget-icon-box\" data-id=\"01e4fb5\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"icon-box.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-icon-box-wrapper\">\n\n\t\t\t\t\t\t<div class=\"elementor-icon-box-icon\">\n\t\t\t\t<span  class=\"elementor-icon\">\n\t\t\t\t<i aria-hidden=\"true\" class=\"fas fa-barcode\"><\/i>\t\t\t\t<\/span>\n\t\t\t<\/div>\n\t\t\t\n\t\t\t\t\t\t<div class=\"elementor-icon-box-content\">\n\n\t\t\t\t\t\t\t\t\t<h3 class=\"elementor-icon-box-title\">\n\t\t\t\t\t\t<span  >\n\t\t\t\t\t\t\tTipo contribuci\u00f3n\/resultado\t\t\t\t\t\t<\/span>\n\t\t\t\t\t<\/h3>\n\t\t\t\t\n\t\t\t\t\t\t\t\t\t<p class=\"elementor-icon-box-description\">\n\t\t\t\t\t\tDataset p\u00fablico\t\t\t\t\t<\/p>\n\t\t\t\t\n\t\t\t<\/div>\n\t\t\t\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-b39df19 elementor-position-inline-start elementor-view-default elementor-mobile-position-block-start elementor-widget elementor-widget-icon-box\" data-id=\"b39df19\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"icon-box.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-icon-box-wrapper\">\n\n\t\t\t\t\t\t<div class=\"elementor-icon-box-icon\">\n\t\t\t\t<span  class=\"elementor-icon\">\n\t\t\t\t<i aria-hidden=\"true\" class=\"fas fa-file-contract\"><\/i>\t\t\t\t<\/span>\n\t\t\t<\/div>\n\t\t\t\n\t\t\t\t\t\t<div class=\"elementor-icon-box-content\">\n\n\t\t\t\t\t\t\t\t\t<h3 class=\"elementor-icon-box-title\">\n\t\t\t\t\t\t<span  >\n\t\t\t\t\t\t\tDescripci\u00f3n\t\t\t\t\t\t<\/span>\n\t\t\t\t\t<\/h3>\n\t\t\t\t\n\t\t\t\t\t\t\t\t\t<p class=\"elementor-icon-box-description\">\n\t\t\t\t\t\tDataset de peticiones HTTP reales etiquetadas para entrenamiento y validaci\u00f3n de AIDS y WAF<br>Incluye 47 millones de peticiones normales \/ ataques \/ err\u00f3neas <br> <a href=\"https:\/\/urldefense.com\/v3\/__https:\/doi.org\/10.12795\/11441\/148254__;!!D9dNQwwGXtA!VHas30EalTEuV-Bq3Uas5jXg7EtslrESRiQbnAV8ixo80d1etKKIthB2Nht9C-XGJrkVOnOOo4hq$\">doi: 10.12795\/11441\/148254<\/a>\t\t\t\t\t<\/p>\n\t\t\t\t\n\t\t\t<\/div>\n\t\t\t\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7ac9ca7 elementor-position-right elementor-vertical-align-bottom elementor-widget elementor-widget-image-box\" data-id=\"7ac9ca7\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image-box.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<div class=\"elementor-image-box-wrapper\"><figure class=\"elementor-image-box-img\"><img decoding=\"async\" width=\"360\" height=\"103\" src=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2022\/03\/descarga.png\" class=\"attachment-full size-full wp-image-767\" alt=\"\" srcset=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2022\/03\/descarga.png 360w, https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2022\/03\/descarga-300x86.png 300w\" sizes=\"(max-width: 360px) 100vw, 360px\" \/><\/figure><div class=\"elementor-image-box-content\"><p class=\"elementor-image-box-description\">Este resultado ha sido parcialmente financiado por  MCIN\/ AEI\/10.13039\/501100011033\/<\/p><\/div><\/div>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f0a8411 elementor-position-right elementor-vertical-align-bottom elementor-widget elementor-widget-image-box\" data-id=\"f0a8411\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image-box.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<div class=\"elementor-image-box-wrapper\"><figure class=\"elementor-image-box-img\"><img decoding=\"async\" width=\"777\" height=\"243\" src=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/feder-ja.png\" class=\"attachment-full size-full wp-image-2001\" alt=\"\" srcset=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/feder-ja.png 777w, https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/feder-ja-300x94.png 300w, https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/feder-ja-768x240.png 768w\" sizes=\"(max-width: 777px) 100vw, 777px\" \/><\/figure><div class=\"elementor-image-box-content\"><p class=\"elementor-image-box-description\">Este resultado ha sido parcialmente financiado por  FEDER\/ Junta de Andaluc\u00eda<\/p><\/div><\/div>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-54a7d00 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"54a7d00\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-b90ccbd\" data-id=\"b90ccbd\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-6369844 elementor-widget elementor-widget-heading\" data-id=\"6369844\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Presentaci\u00f3n<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-be03ef0 elementor-widget elementor-widget-text-editor\" data-id=\"be03ef0\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">El desarrollo y evaluaci\u00f3n de sistemas de detecci\u00f3n de intrusos basados en anomal\u00edas (AIDS) requiere de <b>conjuntos de datos de entrenamiento<\/b> adecuados. Este nuevo conjunto de datos disponible para la comunidad cient\u00edfica\u00a0 tiene como objetivo posibilitar su uso en el contexto de los sistemas web.<\/p>\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Ha sido recopilado a partir de las trazas del <b>servidor web de la biblioteca de la Universidad de Sevilla<\/b> (Espa\u00f1a) y sus principales caracter\u00edsticas son:<\/p>\n<p style=\"text-align: justify; text-indent: -20pt; line-height: normal; margin: 5.0pt 20pt 1pt 40pt;\">&#8211; <b>Real<\/b>: ha sido adquirido a partir de la operaci\u00f3n real del servidor e incluye todas las peticiones recibidas.<\/p>\n<p style=\"text-align: justify; text-indent: -20pt; line-height: normal; margin: 5.0pt 20pt 1pt 40pt;\">&#8211; <b>Gran volumen de datos y lapso de tiempo<\/b>: incluye m\u00e1s de 47 millones de solicitudes HTTP recibidas por el servidor web p\u00fablico durante 6 meses.<\/p>\n<p style=\"text-align: justify; text-indent: -20pt; line-height: normal; margin: 5.0pt 20pt 1pt 40pt;\">&#8211; <b>Etiquetado<\/b> de peticiones: hemos seguido un proceso semiautom\u00e1tico para etiquetar cada registro en el conjunto de datos, tomando como punto de partida la clasificaci\u00f3n por varios SIDS.\nSe proporcionan as\u00ed peticiones leg\u00edtimas junto con <b>ataques reales<b> (es decir, en el mismo contexto del servidor web).<\/b><\/b><\/p>\n&nbsp;\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">El conjunto de datos obtenido puede utilizarse como <b><em>ground-truth<\/em><\/b> para el <b>entrenamiento y evaluaci\u00f3n<\/b> de sistemas de detecci\u00f3n de intrusiones a los servidores web. Este dataset presenta un alto potencial, ya que, adem\u00e1s de las caracter\u00edsticas mencionadas previamente, corresponde a un servicio altamente din\u00e1mico (amplio uso de <em> queries<\/em> desplegado con par\u00e1metros) con tecnolog\u00eda actual.<\/p>\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Para evitar problemas de privacidad, hemos restringido la informaci\u00f3n en el conjunto de datos al M\u00e9todo, recurso (URI) y c\u00f3digo de respuesta para cada solicitud, suprimiendo marcas temporales (pero manteniendo el orden secuencial), direcciones IP y reemplazando elementos sensibles en el URI.<\/p>\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\"><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-72936aa elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"72936aa\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-ed6c3ad\" data-id=\"ed6c3ad\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-4e38c3e elementor-widget elementor-widget-heading\" data-id=\"4e38c3e\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Procedimiento<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7a7338e elementor-widget elementor-widget-text-editor\" data-id=\"7a7338e\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<span style=\"color: #038daa; font-size: 120%;\"><strong>Adquisici\u00f3n del dataset<\/strong><\/span>\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Se ha adquirido todo el tr\u00e1fico a los servicios HTTP y HTTPS durante 6 meses a partir de los archivos de traza del servidor Apache. El tr\u00e1fico capturado se organiza en archivos diarios por servicio etiquetados con el n\u00famero de mes y d\u00eda. Se han agrupado estos archivos por d\u00eda y se ha a\u00f1adido un identificador \u00fanico a cada l\u00ednea. Los datos m\u00e1s relevantes de la adquisici\u00f3n son:<\/p>\n\n<table style=\"width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa; margin-left: auto; margin-right: auto;\">\n<thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\">\n<tr>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left;\"><b>Bloque<\/b><\/th>\n<th style=\"padding: 0px; text-align: center;\"><b>Vol (GB)<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; text-align: center;\"><b># arch <\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; text-align: center;\"><b># l\u00edneas<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; text-align: center;\"><b>#Uris <\/b><\/th>\n<\/tr>\n<\/thead>\n<tbody style=\"padding: 0px;\">\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>access_log<b><\/b><\/b><\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4.15<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">198<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">34 573 623<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa; padding-left: 5px;\">34 074 832<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>ssl_access_log<\/b><\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1.17<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">172<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">13 328 700<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">13 328 164<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>apachelog (ambos)<\/b><\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">5.99<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">198<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa; padding-left: 5px;\">47 902 323<\/td>\n<td style=\"padding: 0px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">47 402 996<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n&nbsp;\n\n&nbsp;\n<table style=\"margin-left: auto; margin-right: auto; width: auto !important; font-size: 1.2rem; border-color=#ffffff;box-sizing: border-box; margin-bottom: 0px; border: 0px solid #038daa;\">\n<tbody>\n<tr style=\"align: center;\">\n<td style=\"align: center; border: 0;\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter\" src=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/timeline-300x204.png\" alt=\"\" width=\"300\" height=\"204\" \/><\/td>\n<td style=\"text-align: center; border: 0;\"><\/td>\n<td style=\"align: center; border: 0;\"><img loading=\"lazy\" decoding=\"async\" class=\"aligncenter\" src=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/04\/cr-300x217.png\" alt=\"\" width=\"300\" height=\"217\" \/><\/td>\n<\/tr>\n<tr style=\"align: center; border: 0px;\">\n<td style=\"align: center; text-align: center; border: 0;\">Evoluci\u00f3n temporal del n\u00famero de peticiones capturadas por servicio<\/td>\n<td style=\"text-align: center; border: 0;\"><\/td>\n<td style=\"text-align: center; border: 0;\">Distribuci\u00f3n de peticiones por c\u00f3digo de respuesta<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n&nbsp;\n\n&nbsp;\n\n<span style=\"color: #038daa; font-size: 120%;\"><b>Limpieza y etiquetado<\/b><\/span>\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Se han categorizado los registros (Normal \/ TP \/ FP \/ etc.) siguiendo el m\u00e9todo publicado en:<\/p>\n\n<center><div class=\"teachpress_pub_list\"><form name=\"tppublistform\" method=\"get\"><a name=\"tppubs\" id=\"tppubs\"><\/a><\/form><div class=\"teachpress_publication_list\"><div class=\"tp_publication tp_publication_article\"><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> D\u00edaz-Verdejo, Jes\u00fas E.;  Estepa, Antonio;  Estepa, Rafael;  Madinabeitia, German;  Mu\u00f1oz-Calle, Fco Javier<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('274','tp_links')\" style=\"cursor:pointer;\">A methodology for conducting efficient sanitization of HTTP training datasets<\/a> <span class=\"tp_pub_type tp_  article\">Art\u00edculo de revista<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">En: <\/span><span class=\"tp_pub_additional_journal\">Future Generation Computer Systems, <\/span><span class=\"tp_pub_additional_volume\">vol. 109, <\/span><span class=\"tp_pub_additional_pages\">pp. 67\u201382, <\/span><span class=\"tp_pub_additional_year\">2020<\/span>, <span class=\"tp_pub_additional_issn\">ISSN: 0167739X<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_274\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('274','tp_abstract')\" title=\"Mostrar resumen\" style=\"cursor:pointer;\">Resumen<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_274\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('274','tp_links')\" title=\"Mostrar enlaces y recursos\" style=\"cursor:pointer;\">Enlaces<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_274\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('274','tp_bibtex')\" title=\"Mostrar entrada BibTeX \" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_274\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@article{Diaz-Verdejo2020,<br \/>\r\ntitle = {A methodology for conducting efficient sanitization of HTTP training datasets},<br \/>\r\nauthor = {Jes\u00fas E. D\u00edaz-Verdejo and Antonio Estepa and Rafael Estepa and German Madinabeitia and Fco Javier Mu\u00f1oz-Calle},<br \/>\r\nurl = {https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X19322629},<br \/>\r\ndoi = {10.1016\/j.future.2020.03.033},<br \/>\r\nissn = {0167739X},<br \/>\r\nyear  = {2020},<br \/>\r\ndate = {2020-08-01},<br \/>\r\nurldate = {2020-08-01},<br \/>\r\njournal = {Future Generation Computer Systems},<br \/>\r\nvolume = {109},<br \/>\r\npages = {67--82},<br \/>\r\npublisher = {Elsevier B.V.},<br \/>\r\nabstract = {The performance of anomaly-based intrusion detection systems depends on the quality of the datasets used to form normal activity profiles. Suitable datasets should include high volumes of real-life data free from attack instances. On account of this requirement, obtaining quality datasets from collected data requires a process of data sanitization that may be prohibitive if done manually, or uncertain if fully automated. In this work, we propose a sanitization approach for obtaining datasets from HTTP traces suited for training, testing, or validating anomaly-based attack detectors. Our methodology has two sequential phases. In the first phase, we clean known attacks from data using a pattern-based approach that relies on tools that detect URI-based known attacks. In the second phase, we complement the result of the first phase by conducting assisted manual labeling systematically and efficiently, setting the focus of expert examination not on the raw data (which would be millions of URIs), but on the set of words that compose the URIs. This dramatically downsizes the volume of data that requires expert discernment, making manual sanitization of large datasets feasible. We have applied our method to sanitize a trace that includes 45 million requests received by the library web server of the University of Seville. We were able to generate clean datasets in less than 84 h with only 33 h of manual supervision. We have also applied our method to some public benchmark datasets, confirming that attacks unnoticed by signature-based detectors can be discovered in a reduced time span.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {article}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('274','tp_bibtex')\">Cerrar<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_274\" style=\"display:none;\"><div class=\"tp_abstract_entry\">The performance of anomaly-based intrusion detection systems depends on the quality of the datasets used to form normal activity profiles. Suitable datasets should include high volumes of real-life data free from attack instances. On account of this requirement, obtaining quality datasets from collected data requires a process of data sanitization that may be prohibitive if done manually, or uncertain if fully automated. In this work, we propose a sanitization approach for obtaining datasets from HTTP traces suited for training, testing, or validating anomaly-based attack detectors. Our methodology has two sequential phases. In the first phase, we clean known attacks from data using a pattern-based approach that relies on tools that detect URI-based known attacks. In the second phase, we complement the result of the first phase by conducting assisted manual labeling systematically and efficiently, setting the focus of expert examination not on the raw data (which would be millions of URIs), but on the set of words that compose the URIs. This dramatically downsizes the volume of data that requires expert discernment, making manual sanitization of large datasets feasible. We have applied our method to sanitize a trace that includes 45 million requests received by the library web server of the University of Seville. We were able to generate clean datasets in less than 84 h with only 33 h of manual supervision. We have also applied our method to some public benchmark datasets, confirming that attacks unnoticed by signature-based detectors can be discovered in a reduced time span.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('274','tp_abstract')\">Cerrar<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_274\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"fas fa-globe\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X19322629\" title=\"https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X19322629\" target=\"_blank\">https:\/\/linkinghub.elsevier.com\/retrieve\/pii\/S0167739X19322629<\/a><\/li><li><i class=\"ai ai-doi\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/dx.doi.org\/10.1016\/j.future.2020.03.033\" title=\"DOI de seguimiento:10.1016\/j.future.2020.03.033\" target=\"_blank\">doi:10.1016\/j.future.2020.03.033<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('274','tp_links')\">Cerrar<\/a><\/p><\/div><\/div><\/div><\/div><\/div><\/center>\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Por\ntanto, se han preprocesado y normalizado las URI, anonimizando la\ninformaci\u00f3n sensible. Se ha procedido a la detecci\u00f3n de ataques mediante SIDS a partir de la herramienta <a href=\"\/neus-cslab\/recursos\/inspectorlog\/\"><b>Inspectorlog<\/b><\/a> desarrollada por\nnuestro grupo. Se han usado reglas Talos+ETOpen (M2), nemesida y CRS en\nconfiguraci\u00f3n PL1 y PL2. Se han supervisado y marcado como TP o FP todas\nlas detecciones. Finalmente, en la fase siguiente se ha analizado el vocabulario y\na\u00f1adido etiquetas para marcar ataques adicionales no detectados por los SIDS y registros que incumplen las normas de aplicaci\u00f3n a los URI (fuera de especificaci\u00f3n, OOS).<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-bc444ca elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"bc444ca\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-66dee80\" data-id=\"66dee80\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-490b890 elementor-widget elementor-widget-heading\" data-id=\"490b890\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Dataset - Archivos y formatos<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-e2bec7b elementor-widget elementor-widget-text-editor\" data-id=\"e2bec7b\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"color: #038daa; font-size: 150%;\">Dataset<strong> Biblio-US17<\/strong><\/span><\/p><table><tbody><tr><td width=\"70%\"><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">El dataset se organiza en una estructura en \u00e1rbol (subdirectorios) cada uno conteniendo diferentes tipos de archivos o conjuntos. Se proporcionan 5 conjuntos de archivos y dos esquemas de particionado. Los archivos de las diferentes particiones deben ser generados a partir del dataset utilizando el script proporcionado.<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Los conjuntos de archivos (subdirectorios) incluidos son:<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\">&#8211; Archivos <b>raw<\/b>: Archivos de entrada. Contienen las peticiones consideradas v\u00e1lidas tras el preprocesado obtenidas directamente a partir de las capturas. Se encuentran anonimizadas.<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\">&#8211; Archivos l<b>abels<\/b>: Contienen las etiquetas asignadas durante el an\u00e1lisis.<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\">&#8211; Archivos <b>SID<\/b>: Contienen informaci\u00f3n sobre las alertas generadas por los SIDS.<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\">&#8211; Archivos <b>clean<\/b>: Contienen las peticiones consideradas limpias tras la sanitizaci\u00f3n. Este es el dataset a utilizar como tr\u00e1fico normal.<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\">&#8211; Archivos <strong>attack<\/strong>: Peticiones clasificadas como ataques (s\u00f3lo LVL1 -indubitados-).<\/p><p>\u00a0<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Los registros en cada conjunto se organizan en archivos por d\u00edas denominados<\/p><center><span style=\"color: #038daa; font-size: 100%;\"><strong>biblio-2017-&lt;mm&gt;-&lt;dd&gt;.&lt;ext&gt;<\/strong><\/span><\/center><p>\u00a0<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">siendo <em>&lt;mm&gt;<\/em> el n\u00famero del mes, <em>&lt;dd&gt;<\/em> el d\u00eda y <em>&lt;ext&gt;<\/em> una extensi\u00f3n relacionada con el tipo (y formato) de contenido:<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\"><em>.raw<\/em> para archivos RAW<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\"><em>.lbl<\/em> para archivos LABEL<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\"><em>.cl<\/em> para archivos CLEAN<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\"><em>.sid<\/em> para archivos SID<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 80pt;\"><em>.att<\/em> para archivos ATTACK<\/p><p>\u00a0<\/p><\/td><td width=\"30%\"><img decoding=\"async\" class=\"alignnone size-full wp-image-2909\" src=\"\/neus-cslab\/wp-content\/uploads\/2023\/08\/biblio-us17-dir-134x300-2.png\" alt=\"\" width=\"200\" srcset=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/08\/biblio-us17-dir-134x300-2.png 543w, https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/08\/biblio-us17-dir-134x300-2-194x300.png 194w\" sizes=\"(max-width: 543px) 100vw, 543px\" \/><\/td><\/tr><\/tbody><\/table><p><span style=\"color: #038daa; font-size: 120%;\"><strong>Indexaci\u00f3n<br \/><\/strong><\/span><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Las peticiones se identifican un\u00edvocamente mediante un c\u00f3digo asignado a cada una de las entradas de los archivos de traza originales que permite su localizaci\u00f3n en los mismos. El formato del identificador es:<\/p><center><span style=\"color: #038daa; font-size: 120%;\"><b>[MM-DD-Fnnnnnn] <\/b><\/span><\/center><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">siendo <i>MM-DD<\/i> el n\u00famero de mes y d\u00eda del archivo original, <i>F<\/i> el tipo de tr\u00e1fico (A: HTTTP, S: HTTPS) y <i>nnnnnn<\/i> el n\u00famero de orden de la petici\u00f3n en el archivo de traza original.<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Todos y cada uno de los registros de todos los archivos contienen el identificador de la petici\u00f3n original asociada.<\/p><p><!--- FORMATOS --><\/p><p><span style=\"color: #038daa; font-size: 150%;\"><strong>Formatos de los archivos<\/strong><\/span><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Cada archivo contiene registros cada uno compuesto por un conjunto de <b>campos delimitados por tabuladores<\/b>. Cada registro corresponde a una l\u00ednea y comienza siempre por un identificador. Los campos dependen del tipo de registro\/archivo:<\/p><p><!--- RAW ---><\/p><p><span style=\"color: #038daa; font-size: 120%; margin: 5.0pt 20pt 1pt 40pt;\"><strong>&#8211; Archivos RAW, CLEAN y ATTACK<br \/><\/strong><\/span><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Los archivos de tipo RAW y CLEAN contienen las peticiones incorporadas en el dataset. Cada l\u00ednea corresponde a campos seleccionados extra\u00eddos de las trazas del servidor Apache precedidos de su identificador.<br \/><!-- Cada una de las peticiones ha sido etiquetada con un identificador \u00fanico con el formato: <i>[MM-DD-Fnnnnnn]<\/i> siendo <i>MM-DD<\/i> el n\u00famero de mes y d\u00eda del archivo original, <i>F<\/i> el tipo de tr\u00e1fico (A: HTTTP, S: HTTPS) y <i>nnnnnn<\/i> el n\u00famero de orden de la petici\u00f3n en el archivo de traza original.--><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Cada registro consta de un conjunto de <b>campos delimitados por tabuladores<\/b>:<\/p><center><span style=\"color: #038daa; font-size: 120%;\"><b>[MM-DD-Fnnnnnn] METHOD URI PROTOCOL\u00bb RESP_CODE RESP_SIZE<\/b><\/span><\/center><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Ejemplo de registro:<\/p><center><i><br \/>[02-18-A001234] GET \/2003\/padron.html HTTP\/1.1&#8243; 200 11800<\/i><\/center><p><br \/><!-- LABEL --><br \/><span style=\"color: #038daa; font-size: 120%; margin: 5.0pt 20pt 1pt 40pt;\"><strong>&#8211; Archivos LABEL<\/strong><\/span><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Cada l\u00ednea contiene el conjunto de etiquetas asignadas al registro identificado en el primer campo. Los campos se encuentran delimitados por tabuladores. El formato y valores posibles se muestran en la tabla siguiente:<\/p><p><!---- AQUI TABLA --><\/p><table style=\"align: center; width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa; margin-left: auto; margin-right: auto;\"><thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\"><tr style=\"background-color: #ffffff; border-color: #ffffff; border: 1px; color: #038daa; padding: 0px;\"><td style=\"border: 1px; padding: 0px;\">\u00a0<\/td><td style=\"padding: 0px;\">\u00a0<\/td><td style=\"background-color: #dddddd; text-align: center; padding: 0px;\" colspan=\"4\">Detecci\u00f3n SIDS<\/td><td style=\"background-color: #cccccc; text-align: center; padding: 0px;\">Supervisi\u00f3n SIDS<\/td><td style=\"background-color: #ffdc00; text-align: center; padding: 0px;\" colspan=\"2\">An\u00e1lisis segmentos<\/td><\/tr><tr style=\"font-size: 1.4rem;\"><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #000000; font-size: 1.0rem;\"><b>Registro<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>URI_ID<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>IL_M2<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>IL_NEM<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>MS_PL1<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>MS_PL2<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>ManualTP<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>Phase2TP<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>OOS<\/b><\/th><\/tr><\/thead><tbody style=\"padding: 0px;\"><tr style=\"color: #038daa;\"><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>Valores<b><\/b><\/b><\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">[MM-DD-Fnnnnnn]<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0 &#8211; No detec.<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0 &#8211; No detec.<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0 &#8211; No detec.<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0 &#8211; No detec.<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">-1 &#8211; No etiquetado<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">-1 &#8211; No etiquetado<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0 &#8211; Normal<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 &#8211; Detectado<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 &#8211; Detectado<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 &#8211; Detectado<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 &#8211; Detectado<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0 &#8211; Falso Positivo<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 &#8211; Ataque LVL1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 &#8211; OOS RFC<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 &#8211; Ataque LVL1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2 &#8211; Ataque LVL2<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2 &#8211; OOS Cod<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2 &#8211; Ataque LVL2<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3 &#8211; Ataque LVL3<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3 &#8211; OOS Fmt<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 0px solid #038daa;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4 &#8211; Ataque LVL4<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4 &#8211; OOS Sem<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>Ejemplo<b><\/b><\/b><\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">[02-18-A001234]<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">-1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2<\/td><\/tr><\/tbody><\/table><p><!--- FIN TABLA --><\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 40pt;\">-Un valor 1\/0 en una etiqueta de detecci\u00f3n SIDS indica que el detector correspondiente s\u00ed\/no ha generado alertas.<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 40pt;\">-Un valor -1 en una etiqueta significa que no se ha asignado valor por no corresponderle o no haberse procesado ese campo (valor por defecto).<\/p><p style=\"text-align: justify; text-indent: -20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align; margin: 5.0pt 20pt 1pt 40pt;\">-Un valor 1-4 en las etiquetas ManualTP y Phase2TP implican que se ha etiquetado el URI como ataque del nivel correspondiente y, an\u00e1logamente, un valor 1-4 en el campo OOS indica que el URI no es conforme y codifica el motivo.<\/p><p><!---- INICIO TABLA ---><\/p><table style=\"align: center; width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa; margin-left: auto; margin-right: auto;\"><thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\"><tr><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\" colspan=\"2\"><b>Ataques<\/b><\/td><td style=\"background-color: #ffffff; border-bottom-width: 0px;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\" colspan=\"2\"><b>OOS<\/b><\/td><\/tr><tr style=\"font-size: 1.4rem;\"><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\" width=\"10\"><b>ETIQ.<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left;\" width=\"250\"><b>Expl.<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border-bottom-width: 0px;\" width=\"10\">\u00a0<\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left;\"><b>ETIQ.<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left;\"><b>Expl.<\/b><\/th><\/tr><\/thead><tbody style=\"padding: 0px;\"><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Indubitados<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa; border-width: 0px; border-bottom-width: 0px;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Incumplen RFC 3296<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Dependientes del contexto \/ aplicaci\u00f3n<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa; border-width: 0px;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">2<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Errores codificaci\u00f3n caracteres extendidos\/caracteres no permitidos<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Percent encoding<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa; border-width: 0px;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">3<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Uso de &#8216;\/\/&#8217; al inicio del URI<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">DoS<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa; border-width: 0px;\">\u00a0<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">4<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Otros \/ errores sem\u00e1nticos<\/td><\/tr><\/tbody><\/table><p><!-- FIN TABLA LABEL --><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\"><b>Los registros RAW que no activan ninguna etiqueta <\/b>(valores por defecto, en azul en la tabla) <b>no generan registro LABEL asociado. <\/b>Por tanto, los registros RAW para los que no exista registro LABEL se entiende que toman los valores por defecto (en azul en la tabla).<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Es importante indicar que, dado el procesamiento en fases, no se activan todas las etiquetas posibles para cada registro. Si un registro es etiquetado como ataque en ManualTP, no se realizan procesamientos posteriores sobre \u00e9l. Por tanto, no se activar\u00e1 ninguna etiqueta de la fase de an\u00e1lisis de segmentos.<\/p><p><span style=\"color: #038daa; font-size: 120%; margin: 5.0pt 20pt 1pt 40pt;\"><strong>&#8211; Archivos SID<\/strong><\/span><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Cada l\u00ednea contiene informaci\u00f3n sobre las alertas generadas por el registro identificado en el primer campo. Los campos se encuentran delimitados por tabuladores. El formato es:<\/p><center><span style=\"color: #038daa; font-size: 120%;\"><b>[MM-DD-Fnnnnnn] SID DET<\/b><\/span><\/center><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">siendo SID el identificador de la regla activada y DET el c\u00f3digo del detector asociado, seg\u00fan:<\/p><p><!---- INICIO TABLA ---><\/p><table style=\"width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa; margin-left: auto; margin-right: auto;\"><thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\"><tr style=\"font-size: 1.4rem;\"><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>DET<\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left;\"><b>Detector <\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left;\"><b>Reglas <\/b><\/th><th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left;\"><b>Observaciones<\/b><\/th><\/tr><\/thead><tbody style=\"padding: 0px;\"><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Snort<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Talos+ETOpen &#8211; Marzo de 2022<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Sids 1024-899999 (Talos) y 2000000-2999999 (ETOpen)<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Nemesida<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Nemesida (p\u00fablicas) &#8211; Nov. 2021<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Sids originales renumerados &gt; 3000000<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">ModSecurity<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">CRS3.3.2 (<b>PL1<\/b>) &#8211; Abril de 2022<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Sids 900000-999999<\/td><\/tr><tr><td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">ModSecurity<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">CRS3.3.2 (<b>PL2<\/b>) &#8211; Abril de 2022<\/td><td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Sids 900000-999999<\/td><\/tr><\/tbody><\/table><p><br \/><span style=\"color: #038daa; font-size: 150%;\"><strong>Particionado<\/strong><\/span><\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Se proporcionan dos esquemas de particionado: <b>TI<\/b> (<em>time independent<\/em>, independiente en el tiempo) y <b>TD<\/b> (<em>time dependent<\/em>, dependiente en el tiempo). Para cada esquema, se establece una distribuci\u00f3n de los registros en las proporciones 60\/30\/10 para entrenamiento\/test\/validaci\u00f3n.<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Para generar las particiones debe utilizarse el script <em>partitions.sh<\/em> suministrado en el directorio <em>\/bin<\/em>.<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">La partici\u00f3n TI organiza los registros en archivos por d\u00eda, mientras que la partici\u00f3n TD se organiza en 7 bloques numerados de 1 a 7.<\/p><p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Los detalles relativos a los esquemas de particionado y su finalidad y uso pueden consultarse en el art\u00edculo referenciado en la secci\u00f3n de documentos t\u00e9cnicos.<\/p><p><!--- FIN TABLA --><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-0b1ac3c elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"0b1ac3c\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-9f832f8\" data-id=\"9f832f8\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-f69229b elementor-widget elementor-widget-heading\" data-id=\"f69229b\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Dataset - Resultado<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-16ce9e9 elementor-widget elementor-widget-text-editor\" data-id=\"16ce9e9\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">Se proporcionan 198 archivos de cada una de las tipolog\u00edas (RAW, LABELS, SID, CLEAN y ATTACK) y un script para generar los esquemas de particionado. El contenido se resume en las siguientes tablas.<\/p>\n<span style=\"color: #038daa; font-size: 120%; margin: 20px;\"><strong>Registros en los archivos<\/strong><\/span>\n<table style=\"align: center; width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa; margin-left: auto; margin-right: auto;\">\n<thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\">\n<tr style=\"font-size: 1.4rem;\">\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>TIPO<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>N\u00fam. registros<\/b><\/th>\n<\/tr>\n<\/thead>\n<tbody style=\"padding: 0px;\">\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">RAW<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">47 402 907<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">LABELS<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">370 859<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">SID<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">344 942<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">CLEAN<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">42 473 128<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">Ataques<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">327 906<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<span style=\"color: #038daa; font-size: 120%; margin: 20px;\"><strong>Etiquetas asignadas<\/strong><\/span>\n<p style=\"text-align: justify; text-indent: 20pt; line-height: 150%; font-family: 'Lato',serif; font-style: normal; font-weight: 300; justify-content: align;\">El n\u00famero de etiquetas asignadas para todos los registros existentes (RAW) y los que se consideran para extraer el tr\u00e1fico limpio (CR&lt;300) es:<\/p>\n\n<table style=\"margin-left: auto; margin-right: auto; width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa;\">\n<thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\">\n<tr style=\"font-size: 1.4rem;\">\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>Clase<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>RAW<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>CR&lt;300<\/b><\/th>\n<\/tr>\n<\/thead>\n<tbody style=\"padding: 0px;\">\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>Ataques<b><\/b><\/b><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">LVL1<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">327 906<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 148<\/td>\n<\/tr>\n<tr>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">LVL2<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">10 634<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">617<\/td>\n<\/tr>\n<tr>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">LVL3<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">5 515<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3 442<\/td>\n<\/tr>\n<tr>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">LVL4<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4 310<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">0<\/td>\n<\/tr>\n<tr style=\"background-color: #ffff80;\">\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\"><b>TOTAL<\/b><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">348 365<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">5 207<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>FP<b><\/b><\/b><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">FP<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">9 222<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">8 184<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\"><b>OOS<b><\/b><\/b><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">OOS1<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">169<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">98<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">OOS2<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2 021<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 735<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">OOS3<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">6 567<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">6 178<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">OOS4<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 595<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 106<\/td>\n<\/tr>\n<tr style=\"background-color: #ffff80;\">\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\"><b>TOTAL<\/b><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">10 352<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">9 117<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<span style=\"color: #038daa; font-size: 120%; margin: 20px;\"><strong>Particiones<\/strong><\/span>\n<table style=\"margin-left: auto; margin-right: auto; width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa;\">\n<thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\">\n<tr style=\"font-size: 1.4rem;\">\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>Part<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>Arch<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>Registros<\/b><\/th>\n<\/tr>\n<\/thead>\n<tbody style=\"padding: 0px;\">\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\">TI<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">TR<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">198<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">25 483 092<\/td>\n<\/tr>\n<tr>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">TEST<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">198<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">12 741 546<\/td>\n<\/tr>\n<tr>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">VAL<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">198<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4 248 490<\/td>\n<\/tr>\n<tr>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\">CALIB<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">93<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">6 095 083<\/td>\n<\/tr>\n<tr style=\"background-color: #ffff80;\">\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<td style=\"padding: 0px; padding-left: 5px; text-align: left; padding-right: 5px; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\"><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n<table style=\"margin-left: auto; margin-right: auto; width: auto !important; font-size: 1.2rem; border-color=rgb(3,141,170);box-sizing: border-box; margin-bottom: 0px; border: 2px solid #038daa;\">\n<thead style=\"background-color: #038daa; color: #ffffff; border-width: 1px; border-spacing: 0px; padding: 0px; padding-right: 5px; padding-left: 5px;\">\n<tr style=\"font-size: 1.4rem;\">\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #ffffff; font-size: 1.0rem;\"><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>Part<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>TR<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>TEST<\/b><\/th>\n<th style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: center;\"><b>VAL<\/b><\/th>\n<\/tr>\n<\/thead>\n<tbody style=\"padding: 0px;\">\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; background-color: #dddddd; border: 1px solid #038daa;\">TD<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">12 437 152<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">8 270 214<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2 535 297<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">13 638 751<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">7 000 209<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3 588 483<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">14 612 283<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">6 123 780<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2 811 780<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">13 849 043<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">6 400 263<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">4 762 313<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">5<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">14 393 994<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">7 574 093<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">3 454 446<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">6<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">13 400 472<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">8 216 759<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">2 668 271<\/td>\n<\/tr>\n<tr>\n<td style=\"padding: 0px; padding-left: 5px; padding-right: 5px; text-align: left; border: 1px solid #038daa;\"><\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">7<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">13 697 873<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">6 122 717<\/td>\n<td style=\"padding: 0px; padding-left: 5px; text-align: right; padding-right: 5px; border: 1px solid #038daa;\">1 945 172<\/td>\n<\/tr>\n<\/tbody>\n<\/table>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-86ba020 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"86ba020\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-bd35699\" data-id=\"bd35699\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-3caceaf elementor-widget elementor-widget-heading\" data-id=\"3caceaf\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Documentos t\u00e9cnicos \/ recursos<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<section class=\"elementor-section elementor-inner-section elementor-element elementor-element-170257f elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"170257f\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-inner-column elementor-element elementor-element-943b2a0\" data-id=\"943b2a0\" data-element_type=\"column\" data-e-type=\"column\" data-settings=\"{&quot;background_background&quot;:&quot;classic&quot;}\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-799ef9b elementor-widget elementor-widget-text-editor\" data-id=\"799ef9b\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<ul><li><a href=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/08\/README.txt\">\u00a0<\/a>El dataset est\u00e1 disponible (bajo petici\u00f3n) en:<\/li><\/ul><pre><a href=\"https:\/\/idus.us.es\/handle\/11441\/148254\">https:\/\/idus.us.es\/handle\/11441\/148254<\/a><\/pre><ul><li>Archivo README<\/li><\/ul><div><pre><a href=\"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-content\/uploads\/2023\/08\/README.txt\"><code><i class=\"fas fa-file\"><\/i> README.txt<\/code><\/a><\/pre><\/div><p><span style=\"color: #038daa; font-size: 120%; margin: 20px;\"><strong>Publicaciones<\/strong><\/span><\/p><p><div class=\"teachpress_pub_list\"><form name=\"tppublistform\" method=\"get\"><a name=\"tppubs\" id=\"tppubs\"><\/a><\/form><div class=\"teachpress_publication_list\"><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> D\u00edaz-Verdejo, Jes\u00fas E.;  Estepa, Rafael;  Estepa, Antonio;  Mu\u00f1oz-Calle, Fco. Javier;  Madinabeitia, Germ\u00e1n<\/p><p class=\"tp_pub_title\">Una revisi\u00f3n de: Building a large, realistic and labeled URI dataset for website modelling in anomaly-based intrusion detection systems: Biblio-US17 <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">En: <\/span><span class=\"tp_pub_additional_booktitle\">Actas de las XI Jornadas Nacionales de Investigaci\u00f3n en Ciberseguridad, <\/span><span class=\"tp_pub_additional_pages\">pp. 464-465, <\/span><span class=\"tp_pub_additional_year\">2026<\/span>, <span class=\"tp_pub_additional_isbn\">ISBN: 979-13-88098-43-7<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_504\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('504','tp_abstract')\" title=\"Mostrar resumen\" style=\"cursor:pointer;\">Resumen<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_504\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('504','tp_bibtex')\" title=\"Mostrar entrada BibTeX \" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_504\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{jnic26-biblio,<br \/>\r\ntitle = {Una revisi\u00f3n de: Building a large, realistic and labeled URI dataset for website modelling in anomaly-based intrusion detection systems: Biblio-US17},<br \/>\r\nauthor = {Jes\u00fas E. {D\u00edaz-Verdejo} and Rafael Estepa and Antonio Estepa and Fco. Javier {Mu\u00f1oz-Calle} and Germ\u00e1n Madinabeitia},<br \/>\r\nisbn = {979-13-88098-43-7},<br \/>\r\nyear  = {2026},<br \/>\r\ndate = {2026-05-06},<br \/>\r\nurldate = {2026-05-06},<br \/>\r\nbooktitle = {Actas de las XI Jornadas Nacionales de Investigaci\u00f3n en Ciberseguridad},<br \/>\r\njournal = {Actas de las XI Jornadas Nacionales de Investigaci\u00f3n en Ciberseguridad},<br \/>\r\npages = {464-465},<br \/>\r\nabstract = {Este trabajo presenta la creaci\u00f3n, depuraci\u00f3n y validaci\u00f3n de Biblio US17, un conjunto de datos exhaustivo y etiquetado con 47 millones de peticiones HTTP, obtenido durante seis meses de operaci\u00f3n del servidor web de la Biblioteca de la Universidad de Sevilla. Esta contribuci\u00f3n se enmarca en la creciente necesidad de datasets realistas, recientes y etiquetados que permitan evaluar con rigor los sistemas de detecci\u00f3n de intrusiones basados en anomal\u00b4 \u0131as (AIDS) en el contexto de ata<br \/>\r\nques web que se manifiestan en los Uniform Resource Identifiers (URI).<br \/>\r\nCada registro ha sido etiquetado como normal o de ataque tras un exhaustivo proceso semiautomatizado que incluy\u00f3 la detecci\u00f3n basada en firmas y la detecci\u00f3n de anomal\u00edas en el vocabulario observado en las URI. Se describen los procesos de recopilaci\u00f3n y etiquetado, la estructura del conjunto de datos y las propiedades m\u00e1s relevantes. El dataset resultante est\u00e1 disponible p\u00fablicamente.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('504','tp_bibtex')\">Cerrar<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_504\" style=\"display:none;\"><div class=\"tp_abstract_entry\">Este trabajo presenta la creaci\u00f3n, depuraci\u00f3n y validaci\u00f3n de Biblio US17, un conjunto de datos exhaustivo y etiquetado con 47 millones de peticiones HTTP, obtenido durante seis meses de operaci\u00f3n del servidor web de la Biblioteca de la Universidad de Sevilla. Esta contribuci\u00f3n se enmarca en la creciente necesidad de datasets realistas, recientes y etiquetados que permitan evaluar con rigor los sistemas de detecci\u00f3n de intrusiones basados en anomal\u00b4 \u0131as (AIDS) en el contexto de ata<br \/>\r\nques web que se manifiestan en los Uniform Resource Identifiers (URI).<br \/>\r\nCada registro ha sido etiquetado como normal o de ataque tras un exhaustivo proceso semiautomatizado que incluy\u00f3 la detecci\u00f3n basada en firmas y la detecci\u00f3n de anomal\u00edas en el vocabulario observado en las URI. Se describen los procesos de recopilaci\u00f3n y etiquetado, la estructura del conjunto de datos y las propiedades m\u00e1s relevantes. El dataset resultante est\u00e1 disponible p\u00fablicamente.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('504','tp_abstract')\">Cerrar<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_article\"><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> D\u00edaz-Verdejo, Jes\u00fas E.;  Estepa Alonso, Rafael;  Estepa Alonso, Antonio;  Mu\u00f1oz-Calle, F. J.;  Madinabeitia, German<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('496','tp_links')\" style=\"cursor:pointer;\">Building a large, realistic and labeled HTTP URI dataset for anomaly-based intrusion detection systems: Biblio-US17 <\/a> <span class=\"tp_pub_type tp_  article\">Art\u00edculo de revista<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">En: <\/span><span class=\"tp_pub_additional_journal\">Cybersecurity, <\/span><span class=\"tp_pub_additional_volume\">vol. 8, <\/span><span class=\"tp_pub_additional_number\">no 35, <\/span><span class=\"tp_pub_additional_year\">2025<\/span>, <span class=\"tp_pub_additional_issn\">ISSN: 2523-3246<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_496\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('496','tp_abstract')\" title=\"Mostrar resumen\" style=\"cursor:pointer;\">Resumen<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_496\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('496','tp_links')\" title=\"Mostrar enlaces y recursos\" style=\"cursor:pointer;\">Enlaces<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_496\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('496','tp_bibtex')\" title=\"Mostrar entrada BibTeX \" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_496\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@article{Biblio24,<br \/>\r\ntitle = {Building a large, realistic and labeled HTTP URI dataset for anomaly-based intrusion detection systems: Biblio-US17 },<br \/>\r\nauthor = {Jes\u00fas E. {D\u00edaz-Verdejo} and {Estepa Alonso}, Rafael and {Estepa Alonso}, Antonio and F. J. {Mu\u00f1oz-Calle} and German {Madinabeitia}},<br \/>\r\ndoi = {https:\/\/doi.org\/10.1186\/s42400\u2011024\u201100336\u20113},<br \/>\r\nissn = {2523-3246},<br \/>\r\nyear  = {2025},<br \/>\r\ndate = {2025-06-05},<br \/>\r\nurldate = {2025-06-05},<br \/>\r\njournal = {Cybersecurity},<br \/>\r\nvolume = {8},<br \/>\r\nnumber = {35},<br \/>\r\nabstract = {This paper introduces Biblio-US17, a labeled dataset collected over 6 months from the log files of a popular public website at the University of Seville. It contains 47 million records, each including the method, uniform resource identifier (URI) and associated response code and size of every request received by the web server. Records have been classified as either normal or attack using a comprehensive semi-automated process, which involved signature-based detection, assisted inspection of URIs vocabulary, and substantial expert manual supervision. Unlike comparable datasets, this one offers a genuine real-world perspective on the normal operation of an active website, along with an unbiased proportion of actual attacks (i.e., non-synthetic). This makes it ideal for evaluating and comparing anomalybased approaches in a realistic environment. Its extensive size and duration also make it valuable for addressing challenges like data shift and insufficient training. This paper describes the collection and labeling processes, dataset structure, and most relevant properties. We also include an example of an application for assessing the performance of a simple anomaly detector. Biblio-US17, now available to the scientific community, can also be used to model the URIs used by current web servers.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {article}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('496','tp_bibtex')\">Cerrar<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_496\" style=\"display:none;\"><div class=\"tp_abstract_entry\">This paper introduces Biblio-US17, a labeled dataset collected over 6 months from the log files of a popular public website at the University of Seville. It contains 47 million records, each including the method, uniform resource identifier (URI) and associated response code and size of every request received by the web server. Records have been classified as either normal or attack using a comprehensive semi-automated process, which involved signature-based detection, assisted inspection of URIs vocabulary, and substantial expert manual supervision. Unlike comparable datasets, this one offers a genuine real-world perspective on the normal operation of an active website, along with an unbiased proportion of actual attacks (i.e., non-synthetic). This makes it ideal for evaluating and comparing anomalybased approaches in a realistic environment. Its extensive size and duration also make it valuable for addressing challenges like data shift and insufficient training. This paper describes the collection and labeling processes, dataset structure, and most relevant properties. We also include an example of an application for assessing the performance of a simple anomaly detector. Biblio-US17, now available to the scientific community, can also be used to model the URIs used by current web servers.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('496','tp_abstract')\">Cerrar<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_496\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"ai ai-doi\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/dx.doi.org\/https:\/\/doi.org\/10.1186\/s42400\u2011024\u201100336\u20113\" title=\"DOI de seguimiento:https:\/\/doi.org\/10.1186\/s42400\u2011024\u201100336\u20113\" target=\"_blank\">doi:https:\/\/doi.org\/10.1186\/s42400\u2011024\u201100336\u20113<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('496','tp_links')\">Cerrar<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> D\u00edaz-Verdejo, Jes\u00fas E.;  Estepa Alonso, Rafael;  Estepa Alonso, Antonio;  Mu\u00f1oz-Calle, Javier;  Madinabeitia, Germ\u00e1n<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('492','tp_links')\" style=\"cursor:pointer;\">Biblio-US17: A labeled real URL dataset for anomaly-based intrusion detection systems development<\/a> <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">En: <\/span><span class=\"tp_pub_additional_booktitle\">European Interdisciplinary Cybersecurity Conference (EICC 2024), <\/span><span class=\"tp_pub_additional_pages\">pp. 217\u2013218, <\/span><span class=\"tp_pub_additional_year\">2024<\/span>, <span class=\"tp_pub_additional_isbn\">ISBN: 9798400716515<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_492\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('492','tp_abstract')\" title=\"Mostrar resumen\" style=\"cursor:pointer;\">Resumen<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_492\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('492','tp_links')\" title=\"Mostrar enlaces y recursos\" style=\"cursor:pointer;\">Enlaces<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_492\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('492','tp_bibtex')\" title=\"Mostrar entrada BibTeX \" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_492\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{Diaz-Verdejo2024b,<br \/>\r\ntitle = {Biblio-US17: A labeled real URL dataset for anomaly-based intrusion detection systems development},<br \/>\r\nauthor = {Jes\u00fas E. D\u00edaz-Verdejo and {Estepa Alonso}, Rafael and {Estepa Alonso}, Antonio and Javier Mu\u00f1oz-Calle and Germ\u00e1n Madinabeitia},<br \/>\r\ndoi = {10.1145\/3655693.3661319},<br \/>\r\nisbn = {9798400716515},<br \/>\r\nyear  = {2024},<br \/>\r\ndate = {2024-01-01},<br \/>\r\nurldate = {2024-01-01},<br \/>\r\nbooktitle = {European Interdisciplinary Cybersecurity Conference (EICC 2024)},<br \/>\r\npages = {217\u2013218},<br \/>\r\nabstract = {The development of anomaly-based intrusion detection systems is hindered by the scarcity of adequate datasets. An ideal dataset should contain real traffic, genuine attacks and cover a large time period that may demonstrate time shift. To be useful, the dataset must be labeled to provide accurate ground-truth, This paper presents a dataset of URLs that possesses these qualities. It can therefore be used to effectively train, test, and validate URL-based anomaly detection systems. The dataset is publicly available and contains 47M registers, including 320k attacks, and spans for 6.5 months. It is partitioned acording to two schemes to allow for time dependent and time independent experiments.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('492','tp_bibtex')\">Cerrar<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_492\" style=\"display:none;\"><div class=\"tp_abstract_entry\">The development of anomaly-based intrusion detection systems is hindered by the scarcity of adequate datasets. An ideal dataset should contain real traffic, genuine attacks and cover a large time period that may demonstrate time shift. To be useful, the dataset must be labeled to provide accurate ground-truth, This paper presents a dataset of URLs that possesses these qualities. It can therefore be used to effectively train, test, and validate URL-based anomaly detection systems. The dataset is publicly available and contains 47M registers, including 320k attacks, and spans for 6.5 months. It is partitioned acording to two schemes to allow for time dependent and time independent experiments.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('492','tp_abstract')\">Cerrar<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_492\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"ai ai-doi\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/dx.doi.org\/10.1145\/3655693.3661319\" title=\"DOI de seguimiento:10.1145\/3655693.3661319\" target=\"_blank\">doi:10.1145\/3655693.3661319<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('492','tp_links')\">Cerrar<\/a><\/p><\/div><\/div><\/div><\/div><\/div><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>Progreso 100% BIBLIO-US17: Dataset HTTP Tipo contribuci\u00f3n\/resultado Dataset p\u00fablico Descripci\u00f3n Dataset de peticiones HTTP reales etiquetadas para entrenamiento y validaci\u00f3n de AIDS y WAFIncluye 47 millones de peticiones normales \/ ataques \/ err\u00f3neas doi: 10.12795\/11441\/148254 Este resultado ha sido parcialmente financiado por MCIN\/ AEI\/10.13039\/501100011033\/ Este resultado ha sido parcialmente financiado por FEDER\/ Junta de Andaluc\u00eda [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"parent":911,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"ocean_post_layout":"","ocean_both_sidebars_style":"","ocean_both_sidebars_content_width":0,"ocean_both_sidebars_sidebars_width":0,"ocean_sidebar":"","ocean_second_sidebar":"","ocean_disable_margins":"enable","ocean_add_body_class":"","ocean_shortcode_before_top_bar":"","ocean_shortcode_after_top_bar":"","ocean_shortcode_before_header":"","ocean_shortcode_after_header":"","ocean_has_shortcode":"","ocean_shortcode_after_title":"","ocean_shortcode_before_footer_widgets":"","ocean_shortcode_after_footer_widgets":"","ocean_shortcode_before_footer_bottom":"","ocean_shortcode_after_footer_bottom":"","ocean_display_top_bar":"default","ocean_display_header":"default","ocean_header_style":"","ocean_center_header_left_menu":"","ocean_custom_header_template":"","ocean_custom_logo":0,"ocean_custom_retina_logo":0,"ocean_custom_logo_max_width":0,"ocean_custom_logo_tablet_max_width":0,"ocean_custom_logo_mobile_max_width":0,"ocean_custom_logo_max_height":0,"ocean_custom_logo_tablet_max_height":0,"ocean_custom_logo_mobile_max_height":0,"ocean_header_custom_menu":"","ocean_menu_typo_font_family":"","ocean_menu_typo_font_subset":"","ocean_menu_typo_font_size":0,"ocean_menu_typo_font_size_tablet":0,"ocean_menu_typo_font_size_mobile":0,"ocean_menu_typo_font_size_unit":"px","ocean_menu_typo_font_weight":"","ocean_menu_typo_font_weight_tablet":"","ocean_menu_typo_font_weight_mobile":"","ocean_menu_typo_transform":"","ocean_menu_typo_transform_tablet":"","ocean_menu_typo_transform_mobile":"","ocean_menu_typo_line_height":0,"ocean_menu_typo_line_height_tablet":0,"ocean_menu_typo_line_height_mobile":0,"ocean_menu_typo_line_height_unit":"","ocean_menu_typo_spacing":0,"ocean_menu_typo_spacing_tablet":0,"ocean_menu_typo_spacing_mobile":0,"ocean_menu_typo_spacing_unit":"","ocean_menu_link_color":"","ocean_menu_link_color_hover":"","ocean_menu_link_color_active":"","ocean_menu_link_background":"","ocean_menu_link_hover_background":"","ocean_menu_link_active_background":"","ocean_menu_social_links_bg":"","ocean_menu_social_hover_links_bg":"","ocean_menu_social_links_color":"","ocean_menu_social_hover_links_color":"","ocean_disable_title":"default","ocean_disable_heading":"default","ocean_post_title":"","ocean_post_subheading":"","ocean_post_title_style":"","ocean_post_title_background_color":"","ocean_post_title_background":0,"ocean_post_title_bg_image_position":"","ocean_post_title_bg_image_attachment":"","ocean_post_title_bg_image_repeat":"","ocean_post_title_bg_image_size":"","ocean_post_title_height":0,"ocean_post_title_bg_overlay":0.5,"ocean_post_title_bg_overlay_color":"","ocean_disable_breadcrumbs":"default","ocean_breadcrumbs_color":"","ocean_breadcrumbs_separator_color":"","ocean_breadcrumbs_links_color":"","ocean_breadcrumbs_links_hover_color":"","ocean_display_footer_widgets":"default","ocean_display_footer_bottom":"default","ocean_custom_footer_template":"","footnotes":""},"class_list":["post-1337","page","type-page","status-publish","hentry","entry"],"_links":{"self":[{"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/pages\/1337","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/comments?post=1337"}],"version-history":[{"count":421,"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/pages\/1337\/revisions"}],"predecessor-version":[{"id":3523,"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/pages\/1337\/revisions\/3523"}],"up":[{"embeddable":true,"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/pages\/911"}],"wp:attachment":[{"href":"https:\/\/dtstc.ugr.es\/neus-cslab\/wp-json\/wp\/v2\/media?parent=1337"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}