# 【pytorch】使用pytorch自己实现LayerNorm

pytorch中使用LayerNorm的两种方式，一个是nn.LayerNorm,另外一个是nn.functional.layer_norm

# 1. 计算方式

``````x=
[
[0.1,0.2,0.3],
[0.4,0.5,0.6]
]
# shape （2，3）
``````

``````# 计算的维度是最后一维
mean=
[
(0.1+0.2+0.3)/3=0.2,
(0.4+0.5+0.6)/3=0.5
]
``````

``````var=[  mean((0.1-0.2)^2=0.01,(0.2-0.2)^2=0,(0.3-0.2)^2=0.01)+0.00005,
mean((0.4-0.5)^2=0.01, (0.5-0.5)^2=0, (0.6-0.5)^2=0.01)+0.00005
]
= [ 0.0067+0.00005
0.0067+0.00005
]

sqrt(var) = [ 0.0817,
0.0817
]
``````

`````` (x-mean)/sqrt(var) = [ [(0.1-0.2)/0.0817,   (0.2-0.2)/0.0817,  (0.3-0.2)/0.0817],
[(0.4-0.5)/0.0817, (0.5-0.5)/0.0817, (0.6-0.5)/0.0817]
]
= [  [-1.2238,  0.0000,  1.2238],
[-1.2238,  0.0000,  1.2238]
]
``````

# 2. 实现代码

``````import numpy as np
import torch
import torch.nn.functional as F

x = torch.Tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]) # shape is (2,3)

# 注意LayerNorm和layer_norm里的normalized_shape指的都是shape里的数字，而不是index；
# 在内部pytorch会将这个数字转成index
nn_layer_norm = torch.nn.LayerNorm(normalized_shape=[3], eps=1e-5, elementwise_affine=True)
print("LayerNorm=", nn_layer_norm(x))

layer_norm = F.layer_norm(x, normalized_shape=[3], weight=None, bias=None, eps=1e-5)
print("F.layer_norm=", layer_norm)

# dim是维度的index
mean = torch.mean(x, dim=[1], keepdim=True)
# 这里注意是torch.mean而不是torch.sum
# 所以通过torch.var函数是不可以的
var = torch.mean((x - mean) ** 2, dim=[1], keepdim=True)+ 1e-5
print("my LayerNorm=", var,(x - mean) / torch.sqrt(var))
``````

## 多维实现

``````import numpy as np
import torch
import torch.nn.functional as F

x = torch.Tensor([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]).view(2,1,3) # shape (2,1,3)

# 注意这里的normalized_shape只能是张量的后面几个连续维度
# 比如这里的1，3 就是 (2,1,3)的最后两维
nn_layer_norm = torch.nn.LayerNorm(normalized_shape=[1,3], eps=1e-5, elementwise_affine=True)
print("LayerNorm=", nn_layer_norm(x))

layer_norm = F.layer_norm(x, normalized_shape=[1,3], weight=None, bias=None, eps=1e-5)
print("F.layer_norm=", layer_norm)

# 这里的dim写最后两维的index
mean = torch.mean(x, dim=[1,2], keepdim=True)
var = torch.mean((x - mean) ** 2, dim=[1,2], keepdim=True)+ 1e-5
print("my LayerNorm=", (x - mean) / torch.sqrt(var))
``````

`RuntimeError: Given normalized_shape=[2, 3], expected input with shape [*, 2, 3], but got input of size[2, 1, 3]`

THE END