@inproceedings{1780e36904e5405fb09b80789af7b020,
title = "Embedding Task Structure for Action Detection",
abstract = "We present a straightforward, flexible method to enhance the accuracy and quality of action detection by expressing temporal and structural relationships of actions in the loss function of a deep network. We describe ways to represent otherwise implicit structure in video data and demonstrate how these structures reflect natural biases that improve network training. Our experiments show that our approach improves both accuracy and edit-distance of action recognition and detection models over a baseline. Our framework leads to improvements over prior work and obtains state-of-the-art results on multiple benchmarks. The code is available here.",
keywords = "Algorithms, Algorithms, and algorithms, formulations, Machine learning architectures, Video recognition and understanding",
author = "Michael Peven and Hager, {Gregory D.}",
note = "Publisher Copyright: {\textcopyright} 2024 IEEE.; 2024 IEEE Winter Conference on Applications of Computer Vision, WACV 2024 ; Conference date: 04-01-2024 Through 08-01-2024",
year = "2024",
month = jan,
day = "3",
doi = "10.1109/WACV57701.2024.00647",
language = "English (US)",
series = "Proceedings - 2024 IEEE Winter Conference on Applications of Computer Vision, WACV 2024",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "6590--6599",
booktitle = "Proceedings - 2024 IEEE Winter Conference on Applications of Computer Vision, WACV 2024",
}