221 lines
7.6 KiB
Python
221 lines
7.6 KiB
Python
![]() |
import os
|
|||
|
import torch
|
|||
|
import torch.nn as nn
|
|||
|
import torch.nn.functional as F
|
|||
|
from torch.utils.data import Dataset, DataLoader
|
|||
|
from torch_geometric.nn import GCNConv
|
|||
|
import numpy as np
|
|||
|
|
|||
|
|
|||
|
# 图神经网络模块
|
|||
|
class GNNModule(nn.Module):
|
|||
|
def __init__(self, input_dim, hidden_dim, output_dim):
|
|||
|
super(GNNModule, self).__init__()
|
|||
|
self.conv1 = GCNConv(input_dim, hidden_dim)
|
|||
|
self.conv2 = GCNConv(hidden_dim, output_dim)
|
|||
|
|
|||
|
def forward(self, x, edge_index):
|
|||
|
x = F.relu(self.conv1(x, edge_index))
|
|||
|
x = F.relu(self.conv2(x, edge_index))
|
|||
|
return x
|
|||
|
|
|||
|
|
|||
|
# GRU时序网络模块
|
|||
|
class GRUNetwork(nn.Module):
|
|||
|
def __init__(self, input_dim, hidden_dim, num_layers):
|
|||
|
super(GRUNetwork, self).__init__()
|
|||
|
self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
|
|||
|
self.fc = nn.Linear(hidden_dim, 3) # 3个行为类别
|
|||
|
|
|||
|
def forward(self, x):
|
|||
|
out, _ = self.gru(x)
|
|||
|
out = self.fc(out[:, -1, :]) # 使用最后时刻的输出进行分类
|
|||
|
return out
|
|||
|
|
|||
|
|
|||
|
# 主网络(结合GNN和GRU)
|
|||
|
class GNN_GRU_Network(nn.Module):
|
|||
|
def __init__(self, input_dim, hidden_dim_gnn, hidden_dim_gru, num_layers_gru, num_keypoints=17):
|
|||
|
super(GNN_GRU_Network, self).__init__()
|
|||
|
self.gnn = GNNModule(input_dim=2, hidden_dim=hidden_dim_gnn, output_dim=hidden_dim_gnn) # 17个关键点的(x, y)
|
|||
|
self.gru = GRUNetwork(input_dim=num_keypoints * hidden_dim_gnn, hidden_dim=hidden_dim_gru,
|
|||
|
num_layers=num_layers_gru)
|
|||
|
|
|||
|
def forward(self, keypoints, edge_index, seq_length):
|
|||
|
batch_size = keypoints.shape[0]
|
|||
|
num_keypoints = keypoints.shape[1]
|
|||
|
|
|||
|
# 对每个时刻的关键点使用GNN提取空间特征
|
|||
|
spatial_features = []
|
|||
|
for t in range(seq_length):
|
|||
|
x = keypoints[:, t, :].view(batch_size * num_keypoints, 2)
|
|||
|
spatial_feature = self.gnn(x, edge_index) # (batch_size * num_keypoints, hidden_dim_gnn)
|
|||
|
spatial_features.append(spatial_feature)
|
|||
|
|
|||
|
spatial_features = torch.stack(spatial_features, dim=1) # (batch_size * num_keypoints, seq_len, hidden_dim_gnn)
|
|||
|
|
|||
|
gru_input = spatial_features.view(batch_size, seq_length, num_keypoints * hidden_dim_gnn)
|
|||
|
|
|||
|
# 传递给GRU进行时序建模
|
|||
|
output = self.gru(gru_input)
|
|||
|
return output
|
|||
|
|
|||
|
|
|||
|
# 数据集类
|
|||
|
class KeypointDataset(Dataset):
|
|||
|
def __init__(self, root_dir, behavior_labels, seq_length=5):
|
|||
|
"""
|
|||
|
:param root_dir: 包含三个行为文件夹的根目录
|
|||
|
:param behavior_labels: 每个文件夹对应的行为标签
|
|||
|
:param seq_length: 每个视频的时长(秒),每秒20帧
|
|||
|
"""
|
|||
|
self.root_dir = root_dir
|
|||
|
self.behavior_labels = behavior_labels
|
|||
|
self.seq_length = seq_length # 每个视频5秒,假设每秒20帧
|
|||
|
|
|||
|
self.videos = []
|
|||
|
self.labels = []
|
|||
|
|
|||
|
# 遍历根目录,获取每个视频路径和标签
|
|||
|
for idx, behavior in enumerate(behavior_labels):
|
|||
|
behavior_dir = os.path.join(root_dir, behavior)
|
|||
|
for video_name in os.listdir(behavior_dir):
|
|||
|
video_path = os.path.join(behavior_dir, video_name)
|
|||
|
if os.path.isdir(video_path):
|
|||
|
video_files = sorted(os.listdir(video_path)) # 按帧数排序文件
|
|||
|
self.videos.append(video_files)
|
|||
|
self.labels.append(idx) # 标签为行为类别索引
|
|||
|
|
|||
|
def __len__(self):
|
|||
|
return len(self.videos)
|
|||
|
|
|||
|
def __getitem__(self, idx):
|
|||
|
video_files = self.videos[idx]
|
|||
|
label = self.labels[idx]
|
|||
|
|
|||
|
# 每个视频加载时,读取关键点数据
|
|||
|
keypoints = []
|
|||
|
for frame_file in video_files:
|
|||
|
frame_path = os.path.join(self.root_dir, frame_file)
|
|||
|
keypoints.append(self.load_keypoints(frame_path))
|
|||
|
|
|||
|
keypoints = np.array(keypoints) # shape: (num_frames, num_persons, 34) -> (num_frames, num_persons, 17*2)
|
|||
|
return torch.tensor(keypoints, dtype=torch.float32), torch.tensor(label, dtype=torch.long)
|
|||
|
|
|||
|
def load_keypoints(self, frame_path):
|
|||
|
"""
|
|||
|
读取每一帧的关键点数据,每帧数据包含每个人的17个关键点
|
|||
|
:param frame_path: 每帧的txt文件路径
|
|||
|
:return: 该帧的所有人物的17个关键点坐标
|
|||
|
"""
|
|||
|
keypoints = []
|
|||
|
with open(frame_path, 'r') as f:
|
|||
|
for line in f.readlines():
|
|||
|
data = list(map(float, line.strip().split()))
|
|||
|
person_id = int(data[0])
|
|||
|
frame_id = int(data[1])
|
|||
|
coordinates = data[2:] # 后面的34个数值是17个关键点的(x, y)坐标
|
|||
|
keypoints.append(coordinates)
|
|||
|
|
|||
|
return np.array(keypoints) # shape: (num_persons, 17*2)
|
|||
|
|
|||
|
|
|||
|
# 训练过程
|
|||
|
def train(model, train_loader, criterion, optimizer, device):
|
|||
|
model.train()
|
|||
|
total_loss = 0
|
|||
|
for data, label in train_loader:
|
|||
|
data = data.to(device)
|
|||
|
label = label.to(device)
|
|||
|
|
|||
|
optimizer.zero_grad()
|
|||
|
output = model(data) # 前向传播
|
|||
|
loss = criterion(output, label) # 计算损失
|
|||
|
loss.backward() # 反向传播
|
|||
|
optimizer.step() # 更新参数
|
|||
|
|
|||
|
total_loss += loss.item()
|
|||
|
|
|||
|
return total_loss / len(train_loader)
|
|||
|
|
|||
|
|
|||
|
# 验证过程
|
|||
|
def evaluate(model, val_loader, criterion, device):
|
|||
|
model.eval()
|
|||
|
total_loss = 0
|
|||
|
correct = 0
|
|||
|
total = 0
|
|||
|
with torch.no_grad():
|
|||
|
for data, label in val_loader:
|
|||
|
data = data.to(device)
|
|||
|
label = label.to(device)
|
|||
|
|
|||
|
output = model(data)
|
|||
|
loss = criterion(output, label)
|
|||
|
total_loss += loss.item()
|
|||
|
|
|||
|
_, predicted = torch.max(output, 1)
|
|||
|
correct += (predicted == label).sum().item()
|
|||
|
total += label.size(0)
|
|||
|
|
|||
|
accuracy = correct / total
|
|||
|
return total_loss / len(val_loader), accuracy
|
|||
|
|
|||
|
|
|||
|
# 主函数
|
|||
|
def main():
|
|||
|
root_dir = "/path/to/your/data" # 数据集根目录
|
|||
|
behavior_labels = ["behavior1", "behavior2", "behavior3"]
|
|||
|
|
|||
|
# 创建数据集与加载器
|
|||
|
train_dataset = KeypointDataset(root_dir, behavior_labels)
|
|||
|
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
|
|||
|
|
|||
|
# 模型设置
|
|||
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|||
|
model = GNN_GRU_Network(input_dim=2, hidden_dim_gnn=64, hidden_dim_gru=128, num_layers_gru=2)
|
|||
|
model.to(device)
|
|||
|
|
|||
|
# 损失函数与优化器
|
|||
|
criterion = nn.CrossEntropyLoss()
|
|||
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
|||
|
|
|||
|
# 训练与验证
|
|||
|
for epoch in range(1, 101):
|
|||
|
train_loss = train(model, train_loader, criterion, optimizer, device)
|
|||
|
print(f"Epoch {epoch}, Train Loss: {train_loss:.4f}")
|
|||
|
|
|||
|
# 可选:每个epoch结束时进行验证
|
|||
|
# val_loss, val_accuracy = evaluate(model, val_loader, criterion, device)
|
|||
|
# print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
|
|||
|
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
main()
|
|||
|
'''
|
|||
|
代码说明:
|
|||
|
|
|||
|
数据集处理(KeypointDataset):
|
|||
|
假设你的数据集文件夹结构如下:
|
|||
|
data /
|
|||
|
├── behavior1 /
|
|||
|
│ ├── video1 /
|
|||
|
│ │ ├── frame1.txt
|
|||
|
│ │ ├── frame2.txt
|
|||
|
│ │ └── ...
|
|||
|
│ ├── video2 /
|
|||
|
│ └── ...
|
|||
|
├── behavior2 /
|
|||
|
└── behavior3 /
|
|||
|
每个视频文件夹包含若干个.txt
|
|||
|
文件,每个文件代表一帧,记录了每个人的关键点位置(17
|
|||
|
个关键点,每个关键点的(x, y)
|
|||
|
坐标)。
|
|||
|
模型(GNN_GRU_Network):
|
|||
|
使用
|
|||
|
GNN
|
|||
|
对每一帧的关键点进行空间特征提取。
|
|||
|
然后将提取的空间特征传入
|
|||
|
GRU
|
|||
|
网络进行时序建模,最后通过
|
|||
|
'''
|