@inproceedings{7fbd7fb1e6744883aea86f5ec456bbbb,
title = "Comparing Optimization Methods of Neural Networks for Real-time Inference",
abstract = "This paper compares three different optimization approaches for accelerating the inference of convolutional neural networks (CNNs). We compare the techniques of separable convolution, weight pruning, and binarization. Each method is implemented and empirically compared in three aspects: preservation of accuracy, storage requirements, and achieved speed-up. Experiments are performed both on a desktop computer and on a mobile platform using a CNN model for vehicle type classification. Our experiments show that the largest speed-up is achieved by binarization, whereas pruning achieves the largest reduction in storage requirements. Both of these approaches largely preserve the accuracy of the original network.",
author = "Mir Khan and Henri Lunnikivi and Heikki Huttunen and Jani Boutellier",
note = "INT=COMP, {"}Lunnikivi, Henri{"}; European Signal Processing Conference ; Conference date: 01-01-1900",
year = "2019",
month = sep,
day = "3",
doi = "10.23919/EUSIPCO.2019.8902760",
language = "English",
isbn = "978-1-5386-7300-3",
series = "European Signal Processing Conference",
publisher = "IEEE",
booktitle = "2019 27th European Signal Processing Conference (EUSIPCO)",
address = "United States",
}