@inproceedings{1425bec76dab429fa75d82f67c993559,
title = "Processing Continuous-Valued Signals for Multimodal Spike-Based Pose Regression",
abstract = "In underwater missions, energy-efficient operation is critical to maximize the operability of an autonomous robots and mission duration. At the same time, accurate pose estimation at reduced computational cost is essential for various underwater robotics and exploration applications. This paper presents an energy-efficient model for pose regression tasks based on direct Spike-Coding-Decoding (SCD) with adaptive threshold mechanism. The proposed approach enables efficient processing of multimodal continuous-valued images and IMU data. The effectiveness of the proposed module is tested using an open-sourced underwater simulator. In underwater robotics domain, real-world testing remains critical due to limited access to actual underwater environment, making simulators invaluable for rapid algorithm development and refinement. Accordingly, HoloOcean simulator is utilized to collect images and IMU sensor data from various underwater scenarios, which are then fed into a pose estimation framework. The SCD module uses 2D and 1D convolutional layers followed by LIF neurons with adaptive threshold to convert continuous-valued data into sparse spike-based signals. During spike decoding, the spike-based signals are converted back to continuous-values by utilizing the accumulated membrane potential of the output LIF neurons. The feasibility of SCD module is evaluated by its ability to introduce sparsity in spike-based representations with minimal information loss that enables efficient signal reconstruction. Custom multimodal data is collected using the HoloOcean simulator and is used to train the network. We compared the performance of the adaptive learnable threshold method with fixed threshold methods. The experimental results show that spike coding with an adaptive threshold mechanism is highly effective compared to a fixed threshold mechanism, as it can encode the continuous-valued input to its spike-based representation with ≈ 2× higher sparsity and a lesser spike rate.",
keywords = "HoloOcean simulator, Multimodal Signals, Pose Regression, Spike-Coding-Decoding, Spiking Neural Networks, Underwater Simulators",
author = "Vidya Sudevan and Fakhreddine Zayer and Rizwana Kausar and Sajid Javed and Hamad Karki and \{De Masi\}, Giulia and Jorge Dias",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 2025 IEEE International Conference on Simulation, Modeling, and Programming for Autonomous Robots, SIMPAR 2025 ; Conference date: 14-04-2025 Through 18-04-2025",
year = "2025",
doi = "10.1109/SIMPAR62925.2025.10979088",
language = "British English",
series = "2025 IEEE International Conference on Simulation, Modeling, and Programming for Autonomous Robots, SIMPAR 2025",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
editor = "Ignazio Infantino and Valeria Seidita",
booktitle = "2025 IEEE International Conference on Simulation, Modeling, and Programming for Autonomous Robots, SIMPAR 2025",
address = "United States",
}