# Two-Stream Convolutional Networks for Action Recognition in Videos双流网络论文精读

## Two-Stream Convolutional Networks for Action Recognition in Videos双流网络论文精读

OpenCv提取光流的代码在这里：OpenCV: Optical Flow

optical flow stacking的计算公式如下，

I

τ

(

u

,

v

,

2

k

1

)

=

d

τ

+

k

1

x

(

u

,

v

)

I

τ

(

u

,

v

,

2

k

)

=

d

τ

+

k

1

y

(

u

,

v

)

,

u

=

[

1

;

w

]

,

v

=

[

1

;

h

]

,

k

=

[

1

;

L

]

.

begin{aligned} &I_{tau}(u, v, 2 k-1)=d_{tau+k-1}^{x}(u, v) \ &I_{tau}(u, v, 2 k)=d_{tau+k-1}^{y}(u, v), quad u=[1 ; w], v=[1 ; h], k=[1 ; L] . end{aligned}

Iτ(u,v,2k1)=dτ+k1x(u,v)Iτ(u,v,2k)=dτ+k1y(u,v),u=[1;w],v=[1;h],k=[1;L].
trajectory stacking的计算公式如下:

I

τ

(

u

,

v

,

2

k

1

)

=

d

τ

+

k

1

x

(

p

k

)

I

τ

(

u

,

v

,

2

k

)

=

d

τ

+

k

1

y

(

p

k

)

,

u

=

[

1

;

w

]

,

v

=

[

1

;

h

]

,

k

=

[

1

;

L

]

.

begin{aligned} &I_{tau}(u, v, 2 k-1)=d_{tau+k-1}^{x}left(mathbf{p}_{k}right) \ &I_{tau}(u, v, 2 k)=d_{tau+k-1}^{y}left(mathbf{p}_{k}right), quad u=[1 ; w], v=[1 ; h], k=[1 ; L] . end{aligned}

Iτ(u,v,2k1)=dτ+k1x(pk)Iτ(u,v,2k)=dτ+k1y(pk),u=[1;w],v=[1;h],k=[1;L].

p

k

mathbf{p}_{k}

pk 是轨迹上的第

k

k

k个点, 从第

τ

tau

τ帧的位置

(

u

,

v

)

(u, v)

(u,v)开始，且由以下递归关系定义：

p

1

=

(

u

,

v

)

;

p

k

=

p

k

1

+

d

τ

+

k

2

(

p

k

1

)

,

k

>

1.

mathbf{p}_{1}=(u, v) ; quad mathbf{p}_{k}=mathbf{p}_{k-1}+mathbf{d}_{tau+k-2}left(mathbf{p}_{k-1}right), k>1 .

p1=(u,v);pk=pk1+dτ+k2(pk1),k>1.

pytorch网络实现细节：

class SpatialNet(nn.Module):
def __init__(self):
super(SpatialNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=7, stride=2),
# nn.batchnorm2d(96),
nn.ReLU(),
nn.MaxPool2d(3, stride=2),
nn.LocalResponseNorm(2),
nn.Conv2d(96, 256, kernel_size=5, stride=2),
# nn.batchnorm2d(256),
nn.ReLU(),
nn.MaxPool2d(3, stride=2),
nn.LocalResponseNorm(2),
nn.Conv2d(256, 512, kernel_size=3),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3),
# nn.batchnorm2d(512),
nn.ReLU(),
nn.MaxPool2d(3, stride=2),
)

self.classifier = nn.Sequential(
nn.Linear(2048, 4096),
nn.Dropout(),
nn.Linear(4096, 2048),
nn.Dropout(),
nn.Linear(2048, 5),

)
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x

class TemporalNet(nn.Module):
def __init__(self):
super(TemporalNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 96, kernel_size=7, stride=2),
# nn.batchnorm2d(96),
nn.ReLU(),
nn.MaxPool2d(3, stride=2),
nn.LocalResponseNorm(2),
nn.Conv2d(96, 256, kernel_size=5, stride=2),
# nn.batchnorm2d(256),
nn.ReLU(),
nn.MaxPool2d(3, stride=2),
nn.LocalResponseNorm(2),
nn.Conv2d(256, 512, kernel_size=3),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3),
nn.ReLU(),
nn.Conv2d(512, 512, kernel_size=3),
# nn.batchnorm2d(512),
nn.ReLU(),
nn.MaxPool2d(3, stride=2),
)

self.classifier = nn.Sequential(
nn.Linear(2048, 4096),
nn.Dropout(),
nn.Linear(4096, 2048),
nn.Dropout(),
nn.Linear(2048, 5),

)


THE END

)">