@inproceedings{24ad9c472b1b4a55858d6b824e87586f,
title = "HIMALIA: Recovering compiler optimization levels from binaries by deep learning",
abstract = "Compiler optimization levels are important for binary analysis, but they are not available in COTS binaries. In this paper, we present the first end-to-end system called HIMALIA which recovers compiler optimization levels from disassembled binary code without any knowledge of the target instruction set semantics. We achieve this by formulating the problem as a deep learning task and training a two layer recurrent neural network. Besides the recurrent neural network, HIMALIA is also powered by two other techniques: instruction embedding and a new function representation method. We implement HIMALIA and carry out comprehensive experiments on our dataset consisting of 378,695 different functions from 5828 binaries compiled by GCC. The results show that HIMALIA exhibits accuracy of around 89%. Moreover, we find that HIMALIA{\textquoteright}s learnt model is explicable: it can auto-learn common compiler conventions and idioms that match our prior knowledge.",
keywords = "Binary analysis, Feature embedding, Model explicable, RNN, Reverse engineering",
author = "Yu Chen and Zhiqiang Shi and Hong Li and Weiwei Zhao and Yiliang Liu and Yuansong Qiao",
note = "Publisher Copyright: {\textcopyright} Springer Nature Switzerland AG 2019.; Intelligent Systems Conference, IntelliSys 2018 ; Conference date: 06-09-2018 Through 07-09-2018",
year = "2018",
doi = "10.1007/978-3-030-01054-6_3",
language = "English",
isbn = "9783030010539",
series = "Advances in Intelligent Systems and Computing",
publisher = "Springer-Verlag GmbH and Co. KG",
pages = "35--47",
editor = "Kohei Arai and Supriya Kapoor and Rahul Bhatia",
booktitle = "Intelligent Systems and Applications - Proceedings of the 2018 Intelligent Systems Conference IntelliSys Volume 1",
address = "Germany",
}