一、空间注意力机制简介
空间注意力的示意图如下:
长条的是通道注意力机制,而平面则是空间注意力机制,可以发现:
- 通道注意力在意的是每个特怔面的权重
- 空间注意力在意的是面上每一个局部的权重。
注意:空间注意力是右边的部分:Spatial Attention Module
二、空间注意力与pytorch代码
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x): # x.size() 30,40,50,30
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True) # 30,1,50,30
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x) # 30,1,50,30
return self.sigmoid(x) # 30,1,50,30
简单的使用方法如下:
import torch
import torch.nn as nn
import torch.utils.data as Data
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x): # x.size() 30,40,50,30
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True) # 30,1,50,30
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x) # 30,1,50,30
return self.sigmoid(x) # 30,1,50,30
def get_total_train_data(H, W, C, class_count):
"""得到全部的训练数据,这里需要替换成自己的数据"""
import numpy as np
x_train = torch.Tensor(
np.random.random((1000, H, W, C))) # 维度是 [ 数据量, 高H, 宽W, 长C]
y_train = torch.Tensor(
np.random.randint(0, class_count, size=(1000, 1))).long() # [ 数据量, 句子的分类], 这里的class_count=4,就是四分类任务
return x_train, y_train
if __name__ == '__main__':
# ================训练参数=================
epochs = 100
batch_size = 30
output_class = 14
H = 40
W = 50
C = 30
# ================准备数据=================
x_train, y_train = get_total_train_data(H, W, C, class_count=output_class)
train_loader = Data.DataLoader(
dataset=Data.TensorDataset(x_train, y_train), # 封装进Data.TensorDataset()类的数据,可以为任意维度
batch_size=batch_size, # 每块的大小
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=6, # 多进程(multiprocess)来读数据
drop_last=True,
)
# ================初始化模型=================
model = SpatialAttention()
# ================开始训练=================
for i in range(epochs):
for seq, labels in train_loader:
attention_out = model(seq)
seq_attention_out = attention_out.squeeze()
for i in range(seq_attention_out.size()[0]):
print(seq_attention_out[i])
三、使用案例
import torch
import torch.nn as nn
import torch.utils.data as Data
class SpatialAttention(nn.Module):
def __init__(self, kernel_size=7):
super(SpatialAttention, self).__init__()
assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
padding = 3 if kernel_size == 7 else 1
self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
self.sigmoid = nn.Sigmoid()
def forward(self, x): # x.size() 30,40,50,30
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True) # 30,1,50,30
x = torch.cat([avg_out, max_out], dim=1)
x = self.conv1(x) # 30,1,50,30
return self.sigmoid(x) # 30,1,50,30
class UseAttentionModel(nn.Module):
def __init__(self):
super(UseAttentionModel, self).__init__()
self.channel_attention = SpatialAttention()
def forward(self, x): # 反向传播
attention_value = self.channel_attention(x)
out = x.mul(attention_value)
return out
def get_total_train_data(H, W, C, class_count):
"""得到全部的训练数据,这里需要替换成自己的数据"""
import numpy as np
x_train = torch.Tensor(
np.random.random((1000, H, W, C))) # 维度是 [ 数据量, 高H, 宽W, 长C]
y_train = torch.Tensor(
np.random.randint(0, class_count, size=(1000, 1))).long() # [ 数据量, 句子的分类], 这里的class_count=4,就是四分类任务
return x_train, y_train
if __name__ == '__main__':
# ================训练参数=================
epochs = 100
batch_size = 30
output_class = 14
H = 40
W = 50
C = 30
# ================准备数据=================
x_train, y_train = get_total_train_data(H, W, C, class_count=output_class)
train_loader = Data.DataLoader(
dataset=Data.TensorDataset(x_train, y_train), # 封装进Data.TensorDataset()类的数据,可以为任意维度
batch_size=batch_size, # 每块的大小
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=6, # 多进程(multiprocess)来读数据
drop_last=True,
)
# ================初始化模型=================
model = UseAttentionModel()
cross_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # 优化器
model.train()
# ================开始训练=================
for i in range(epochs):
for seq, labels in train_loader:
attention_out = model(seq)
print(attention_out.size())
print(attention_out)