@inproceedings{d979e8a95dd042b8bd040bc972257c64,
title = "Learning action dictionaries from video",
abstract = "Summarizing the contents of a video containing human activities is an important problem in computer vision and has important applications in automated surveillance systems. Summarizing a video requires one to identify and learn a 'vocabulary' of action-phrases corresponding to specific events and actions occurring in the video. We propose a generative model for dynamic scenes containing human activities as a composition of independent action-phrases - each of which is derived from an underlying vocabulary. Given a long video sequence, we propose a completely unsupervised approach to learn the vocabulary. Once the vocabulary is learnt, a video segment can be decomposed into a collection of phrases for summarization. We then describe methods to learn the correlations between activities and sequentiality of events. We also propose a novel method for building invariances to spatial transforms in the summarization scheme.",
keywords = "Activity analysis, Video summarization",
author = "Pavan Turaga and Rama Chellappa",
year = "2008",
doi = "10.1109/ICIP.2008.4712102",
language = "English (US)",
isbn = "1424417643",
series = "Proceedings - International Conference on Image Processing, ICIP",
pages = "1704--1707",
booktitle = "2008 IEEE International Conference on Image Processing, ICIP 2008 Proceedings",
note = "2008 IEEE International Conference on Image Processing, ICIP 2008 ; Conference date: 12-10-2008 Through 15-10-2008",
}