Brief
As we has seen before, cameras in Linux could be a complex story and you have to watch every step you take to get it right.
libcamera does a great job to simplify this in an platform independent way and should be used whenever it is possible.
But not all cameras have a complex flow-chart. Some cameras (e.g. web cameras) are "self-contained" where the image data goes straight from the camera to the user application, without any detours through different IP blocks for image processing on its way.
The V4L2 framework is perfectly suited to those simple cameras.
When I searched around for a simple example application that explained the necessary steps to capture images from a camera, I simple could not find what I was looking for. This is my attempt to provide what I failed to find.
Buffer management
The V4L2 core maintain two buffer queues internally; one queue (referred to as IN) for incoming (camera->driver) image data and one (referred to as OUT) for outgoing (driver->user) image data.
Buffers are put into the IN queue via the VIDIOC_QBUF command. Once the buffer is filled, the buffer is dequeued from IN and put into the OUT queue, which where the data is available for to the user.
Whenever the user want to dequeue a buffer with VIDIOC_DQBUF, and a buffer is available, it is taken from the OUT queue and pushed to the user application. If no buffer is available the dequeue operation will wait until a buffer is filled and available unless the file descriptor is opened with O_NONBLOCK.
Video data can be pushed to userspace in a few different ways:
- Read I/O - simply perform a read() operation and do not mess with buffers
- User pointer - The user application allocates buffers and provide to driver
- DMA buf - Mostly used for mem2mem devices
- mmap - Let driver allocate buffers and mmap(2) these to userspace.
This post will *only* focus on mmap:ed buffers!
Typical workflow
We will follow these steps in order to acquire frames from the camera:
Query capabilities
VIDIOC_QUERYCAP is used to query the supported capabilities. What is most interesting is to verify that it supports the mode (V4L2_CAP_STREAMING) we want to work with. It is also a good manners to verify that it actually is a capture device (V4L2_CAP_VIDEO_CAPTURE) we have opened and nothing else.
The V4L2 API uses a struct v4l2_capability that is passed to the IOCTL. This structure is defined as follows:
/**
* struct v4l2_capability - Describes V4L2 device caps returned by VIDIOC_QUERYCAP
*
* @driver: name of the driver module (e.g. "bttv")
* @card: name of the card (e.g. "Hauppauge WinTV")
* @bus_info: name of the bus (e.g. "PCI:" + pci_name(pci_dev) )
* @version: KERNEL_VERSION
* @capabilities: capabilities of the physical device as a whole
* @device_caps: capabilities accessed via this particular device (node)
* @reserved: reserved fields for future extensions
*/
struct v4l2_capability {
__u8 driver[16];
__u8 card[32];
__u8 bus_info[32];
__u32 version;
__u32 capabilities;
__u32 device_caps;
__u32 reserved[3];
};
The v4l2_capability.capabilities field is decoded as follows:
/* Values for 'capabilities' field */
#define V4L2_CAP_VIDEO_CAPTURE 0x00000001 /* Is a video capture device */
#define V4L2_CAP_VIDEO_OUTPUT 0x00000002 /* Is a video output device */
#define V4L2_CAP_VIDEO_OVERLAY 0x00000004 /* Can do video overlay */
#define V4L2_CAP_VBI_CAPTURE 0x00000010 /* Is a raw VBI capture device */
#define V4L2_CAP_VBI_OUTPUT 0x00000020 /* Is a raw VBI output device */
#define V4L2_CAP_SLICED_VBI_CAPTURE 0x00000040 /* Is a sliced VBI capture device */
#define V4L2_CAP_SLICED_VBI_OUTPUT 0x00000080 /* Is a sliced VBI output device */
#define V4L2_CAP_RDS_CAPTURE 0x00000100 /* RDS data capture */
#define V4L2_CAP_VIDEO_OUTPUT_OVERLAY 0x00000200 /* Can do video output overlay */
#define V4L2_CAP_HW_FREQ_SEEK 0x00000400 /* Can do hardware frequency seek */
#define V4L2_CAP_RDS_OUTPUT 0x00000800 /* Is an RDS encoder */
/* Is a video capture device that supports multiplanar formats */
#define V4L2_CAP_VIDEO_CAPTURE_MPLANE 0x00001000
/* Is a video output device that supports multiplanar formats */
#define V4L2_CAP_VIDEO_OUTPUT_MPLANE 0x00002000
/* Is a video mem-to-mem device that supports multiplanar formats */
#define V4L2_CAP_VIDEO_M2M_MPLANE 0x00004000
/* Is a video mem-to-mem device */
#define V4L2_CAP_VIDEO_M2M 0x00008000
#define V4L2_CAP_TUNER 0x00010000 /* has a tuner */
#define V4L2_CAP_AUDIO 0x00020000 /* has audio support */
#define V4L2_CAP_RADIO 0x00040000 /* is a radio device */
#define V4L2_CAP_MODULATOR 0x00080000 /* has a modulator */
#define V4L2_CAP_SDR_CAPTURE 0x00100000 /* Is a SDR capture device */
#define V4L2_CAP_EXT_PIX_FORMAT 0x00200000 /* Supports the extended pixel format */
#define V4L2_CAP_SDR_OUTPUT 0x00400000 /* Is a SDR output device */
#define V4L2_CAP_META_CAPTURE 0x00800000 /* Is a metadata capture device */
#define V4L2_CAP_READWRITE 0x01000000 /* read/write systemcalls */
#define V4L2_CAP_STREAMING 0x04000000 /* streaming I/O ioctls */
#define V4L2_CAP_META_OUTPUT 0x08000000 /* Is a metadata output device */
#define V4L2_CAP_TOUCH 0x10000000 /* Is a touch device */
#define V4L2_CAP_IO_MC 0x20000000 /* Is input/output controlled by the media controller */
#define V4L2_CAP_DEVICE_CAPS 0x80000000 /* sets device capabilities field */
Example code on how to use VIDIOC_QUERYCAP:
void query_capabilites(int fd)
{
struct v4l2_capability cap;
if (-1 == ioctl(fd, VIDIOC_QUERYCAP, &cap)) {
perror("Query capabilites");
exit(EXIT_FAILURE);
}
if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
fprintf(stderr, "Device is no video capture device\\n");
exit(EXIT_FAILURE);
}
if (!(cap.capabilities & V4L2_CAP_READWRITE)) {
fprintf(stderr, "Device does not support read i/o\\n");
}
if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
fprintf(stderr, "Devices does not support streaming i/o\\n");
}
}
Capabilities could also be read out with v4l2-ctl:
marcus@goliat:~$ v4l2-ctl -d /dev/video4 --info
Driver Info:
Driver name : uvcvideo
Card type : USB 2.0 Camera: USB Camera
Bus info : usb-0000:00:14.0-8.3.1.1
Driver version : 6.0.8
Capabilities : 0x84a00001
Video Capture
Metadata Capture
Streaming
Extended Pix Format
Device Capabilities
Device Caps : 0x04200001
Video Capture
Streaming
Extended Pix Format
Request buffers
Next step once we are done with the format preparations we should allocate buffers to have somewhere to store the images.
This is exactly what VIDIOC_REQBUFS ioctl does for you. The command does take a struct v4l2_requestbuffers as argument:
struct v4l2_requestbuffers {
__u32 count;
__u32 type; /* enum v4l2_buf_type */
__u32 memory; /* enum v4l2_memory */
__u32 capabilities;
__u8 flags;
__u8 reserved[3];
};
Some of these fields must be populated before we can use it:
- v4l2_requestbuffers.count - Should be set to the number of memory buffers that should be allocated. It is important to set a number high enough so that frames won't be dropped due to lack of queued buffers. The driver is the one who decides what the minimum number is. The application should always check the return value of this field as the driver could grant a bigger number of buffers than then application actually requested.
- v4l2_requestbuffers.type - As we are going to use a camera device, set this to V4L2_BUF_TYPE_VIDEO_CAPTURE.
- v4l2_requestbuffers.memory - Set the streaming method. Available values are V4L2_MEMORY_MMAP, V4L2_MEMORY_USERPTR and V4L2_MEMORY_DMABUF.
Example code on how to use VIDIOC_REQBUF:
int request_buffer(int fd, int count) {
struct v4l2_requestbuffers req = {0};
req.count = count;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (-1 == ioctl(fd, VIDIOC_REQBUFS, &req))
{
perror("Requesting Buffer");
exit(1);
}
return req.count;
}
Query buffer
After the buffers are allocated by the kernel, we have to query the physical address of each allocated buffer in order to mmap() those.
The VIDIOC_QUERYBUF ioctl works with the struct v4l2_buffer:
/**
* struct v4l2_buffer - video buffer info
* @index: id number of the buffer
* @type: enum v4l2_buf_type; buffer type (type == *_MPLANE for
* multiplanar buffers);
* @bytesused: number of bytes occupied by data in the buffer (payload);
* unused (set to 0) for multiplanar buffers
* @flags: buffer informational flags
* @field: enum v4l2_field; field order of the image in the buffer
* @timestamp: frame timestamp
* @timecode: frame timecode
* @sequence: sequence count of this frame
* @memory: enum v4l2_memory; the method, in which the actual video data is
* passed
* @offset: for non-multiplanar buffers with memory == V4L2_MEMORY_MMAP;
* offset from the start of the device memory for this plane,
* (or a "cookie" that should be passed to mmap() as offset)
* @userptr: for non-multiplanar buffers with memory == V4L2_MEMORY_USERPTR;
* a userspace pointer pointing to this buffer
* @fd: for non-multiplanar buffers with memory == V4L2_MEMORY_DMABUF;
* a userspace file descriptor associated with this buffer
* @planes: for multiplanar buffers; userspace pointer to the array of plane
* info structs for this buffer
* @m: union of @offset, @userptr, @planes and @fd
* @length: size in bytes of the buffer (NOT its payload) for single-plane
* buffers (when type != *_MPLANE); number of elements in the
* planes array for multi-plane buffers
* @reserved2: drivers and applications must zero this field
* @request_fd: fd of the request that this buffer should use
* @reserved: for backwards compatibility with applications that do not know
* about @request_fd
*
* Contains data exchanged by application and driver using one of the Streaming
* I/O methods.
*/
struct v4l2_buffer {
__u32 index;
__u32 type;
__u32 bytesused;
__u32 flags;
__u32 field;
struct timeval timestamp;
struct v4l2_timecode timecode;
__u32 sequence;
/* memory location */
__u32 memory;
union {
__u32 offset;
unsigned long userptr;
struct v4l2_plane *planes;
__s32 fd;
} m;
__u32 length;
__u32 reserved2;
union {
__s32 request_fd;
__u32 reserved;
};
};
The structure contains a lot of fields, but in our mmap() example, we only need to fill out a few:
- v4l2_buffer.type - Buffer type, we use V4L2_BUF_TYPE_VIDEO_CAPTURE.
- v4l2_buffer.memory - Memory method, still go for V4L2_MEMORY_MMAP.
- v4l2_buffer.index - As we probably have requested multiple buffers and want to mmap each of them we have to distinguish the buffers somehow. The index field is buffer id reaching from 0 to v4l2_requestbuffers.count.
Example code on how to use VIDIOC_QUERYBUF:
int query_buffer(int fd, int index, unsigned char **buffer) {
struct v4l2_buffer buf = {0};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = index;
int res = ioctl(fd, VIDIOC_QUERYBUF, &buf);
if(res == -1) {
perror("Could not query buffer");
return 2;
}
*buffer = (u_int8_t*)mmap (NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, buf.m.offset);
return buf.length;
}
Queue buffers
Before the buffers can be filled with data, the buffers has to be enqueued. Enqueued buffers will lock the memory pages used so that those cannot be swapped out during usage.
The buffers remain locked until that are dequeued, the device is closed or streaming is turned off.
VIDIOC_QBUF takes the same argument as VIDIOC_QUERYBUF and has to be populated the same way.
Example code on how to use VIDIOC_QBUF:
int queue_buffer(int fd, int index) {
struct v4l2_buffer bufd = {0};
bufd.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufd.memory = V4L2_MEMORY_MMAP;
bufd.index = index;
if(-1 == ioctl(fd, VIDIOC_QBUF, &bufd))
{
perror("Queue Buffer");
return 1;
}
return bufd.bytesused;
}
Start stream
Finally all preparations is done and we are up to start the stream! VIDIOC_STREAMON is basically informing the v4l layer that it can start acquire video frames and use the queued buffers to store them.
Example code on how to use VIDIOC_STREAMON:
int start_streaming(int fd) {
unsigned int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if(ioctl(fd, VIDIOC_STREAMON, &type) == -1){
perror("VIDIOC_STREAMON");
exit(1);
}
}
Dequeue buffer
Once buffers are filled with video data, those are ready to be dequeued and consumed by the application.
This ioctl will be blocking (unless O_NONBLOCK is used) until a buffer is available.
As soon the buffer is dequeued and processed, the application has to immediately queue back the buffer so that the driver layer can fill it with new frames.
This is usually part of the application main-loop.
VIDIOC_DQBUF works similar to VIDIOC_QBUF but it populates the v4l2_buffer.index field with the index number of the buffer that has been dequeued.
Example code on how to use VIDIOC_DQBUF:
int dequeue_buffer(int fd) {
struct v4l2_buffer bufd = {0};
bufd.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufd.memory = V4L2_MEMORY_MMAP;
if(-1 == ioctl(fd, VIDIOC_DQBUF, &bufd))
{
perror("DeQueue Buffer");
return 1;
}
return bufd.index;
}
Stop stream
Once we are done with the video capturing, we can stop the streaming.
This will unlock all enqueued buffers and stop capture frames.
Example code on how to use VIDIOC_STREAMOFF:
int stop_streaming(int fd) {
unsigned int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if(ioctl(fd, VIDIOC_STREAMOFF, &type) == -1){
perror("VIDIOC_STREAMON");
exit(1);
}
}
Full example
It is not the most beautiful example, but it is at least something to work with.
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <linux/videodev2.h>
#define NBUF 3
void query_capabilites(int fd)
{
struct v4l2_capability cap;
if (-1 == ioctl(fd, VIDIOC_QUERYCAP, &cap)) {
perror("Query capabilites");
exit(EXIT_FAILURE);
}
if (!(cap.capabilities & V4L2_CAP_VIDEO_CAPTURE)) {
fprintf(stderr, "Device is no video capture device\\n");
exit(EXIT_FAILURE);
}
if (!(cap.capabilities & V4L2_CAP_READWRITE)) {
fprintf(stderr, "Device does not support read i/o\\n");
}
if (!(cap.capabilities & V4L2_CAP_STREAMING)) {
fprintf(stderr, "Devices does not support streaming i/o\\n");
exit(EXIT_FAILURE);
}
}
int queue_buffer(int fd, int index) {
struct v4l2_buffer bufd = {0};
bufd.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufd.memory = V4L2_MEMORY_MMAP;
bufd.index = index;
if(-1 == ioctl(fd, VIDIOC_QBUF, &bufd))
{
perror("Queue Buffer");
return 1;
}
return bufd.bytesused;
}
int dequeue_buffer(int fd) {
struct v4l2_buffer bufd = {0};
bufd.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
bufd.memory = V4L2_MEMORY_MMAP;
bufd.index = 0;
if(-1 == ioctl(fd, VIDIOC_DQBUF, &bufd))
{
perror("DeQueue Buffer");
return 1;
}
return bufd.index;
}
int start_streaming(int fd) {
unsigned int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if(ioctl(fd, VIDIOC_STREAMON, &type) == -1){
perror("VIDIOC_STREAMON");
exit(EXIT_FAILURE);
}
}
int stop_streaming(int fd) {
unsigned int type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
if(ioctl(fd, VIDIOC_STREAMOFF, &type) == -1){
perror("VIDIOC_STREAMON");
exit(EXIT_FAILURE);
}
}
int query_buffer(int fd, int index, unsigned char **buffer) {
struct v4l2_buffer buf = {0};
buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
buf.memory = V4L2_MEMORY_MMAP;
buf.index = index;
int res = ioctl(fd, VIDIOC_QUERYBUF, &buf);
if(res == -1) {
perror("Could not query buffer");
return 2;
}
*buffer = (u_int8_t*)mmap (NULL, buf.length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, buf.m.offset);
return buf.length;
}
int request_buffer(int fd, int count) {
struct v4l2_requestbuffers req = {0};
req.count = count;
req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
req.memory = V4L2_MEMORY_MMAP;
if (-1 == ioctl(fd, VIDIOC_REQBUFS, &req))
{
perror("Requesting Buffer");
exit(EXIT_FAILURE);
}
return req.count;
}
int set_format(int fd) {
struct v4l2_format format = {0};
format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
format.fmt.pix.width = 320;
format.fmt.pix.height = 240;
format.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
format.fmt.pix.field = V4L2_FIELD_NONE;
int res = ioctl(fd, VIDIOC_S_FMT, &format);
if(res == -1) {
perror("Could not set format");
exit(EXIT_FAILURE);
}
return res;
}
int main() {
unsigned char *buffer[NBUF];
int fd = open("/dev/video4", O_RDWR);
int size;
int index;
int nbufs;
query_capabilites(fd);
set_format(fd);
nbufs = request_buffer(fd, NBUF);
if ( nbufs > NBUF) {
fprintf(stderr, "Increase NBUF to at least %i\n", nbufs);
exit(1);
}
for (int i = 0; i < NBUF; i++) {
/* Assume all sizes is equal.. */
size = query_buffer(fd, 0, &buffer[0]);
queue_buffer(fd, i);
}
start_streaming(fd);
fd_set fds;
FD_ZERO(&fds);
FD_SET(fd, &fds);
struct timeval tv = {0};
tv.tv_sec = 2;
int r = select(fd+1, &fds, NULL, NULL, &tv);
if(-1 == r){
perror("Waiting for Frame");
exit(1);
}
index = dequeue_buffer(fd);
int file = open("output.raw", O_RDWR | O_CREAT, 0666);
fprintf(stderr, "file == %i\n", file);
write(file, buffer[index], size);
stop_streaming(fd);
close(file);
close(fd);
return 0;
}