@inproceedings{93cef22a346b4272b8f3b9c08a1c78ae,
title = "Unsupervised noise detection in unstructured data for automatic parsing",
abstract = "The telecommunications industry makes extensive use of data extracted from logs, alarms, traces, diagnostics, and other monitoring devices. Analyzing the generated data requires that the data be parsed, re-structured, and re-formatted. Developing custom parsers for each input format is labor-intensive and requires domain knowledge. In this paper, we describe a novel unsupervised text processing pipeline to automatically detect and label relevant data and eliminate noise using Levenshtein similarity and Agglomerative clustering. We experiment with different similarity and clustering algorithms on a selection of common data formats to verify the accuracy of the proposed technique. The results suggest that the proposed methodology has higher accuracy.",
keywords = "Clustering, Information Extraction, Similarity, Unsupervised Data Mining",
author = "Shubham Jain and {De Buitleir}, Amy and Enda Fallon",
note = "Publisher Copyright: {\textcopyright} 2020 IFIP.; 16th International Conference on Network and Service Management, CNSM 2020, 2nd International Workshop on Analytics for Service and Application Management, AnServApp 2020 and 1st International Workshop on the Future Evolution of Internet Protocols, IPFuture 2020 ; Conference date: 02-11-2020 Through 06-11-2020",
year = "2020",
month = nov,
day = "2",
doi = "10.23919/CNSM50824.2020.9269096",
language = "English",
isbn = "9783903176317",
series = "16th International Conference on Network and Service Management, CNSM 2020, 2nd International Workshop on Analytics for Service and Application Management, AnServApp 2020 and 1st International Workshop on the Future Evolution of Internet Protocols, IPFuture 2020",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
editor = "Nur Zincir-Heywood and Mehmet Ulema and Muge Sayit and Stuart Clayman and Myung-Sup Kim and Cihat Cetinkaya",
booktitle = "16th International Conference on Network and Service Management, CNSM 2020, 2nd International Workshop on Analytics for Service and Application Management, AnServApp 2020 and 1st International Workshop on the Future Evolution of Internet Protocols, IPFuture 2020",
address = "United States",
}