import torch import torch.nn.functional as F import torchvision.transforms as transforms from pytorchvideo.models.hub import i3d_r50 import cv2 import numpy as np from PIL import Image model = i3d_r50(pretrained=True).eval() feature_extractor = torch.nn.Sequential(*model.blocks[:-1]) def preprocess_video(video_path, num_frames=32, size=224): cap = cv2.VideoCapture(video_path) frames = [] transform = transforms.Compose([ transforms.Resize((size, size)), transforms.ToTensor(), transforms.Normalize(mean=[0.45, 0.45, 0.45], std=[0.225, 0.225, 0.225]) ]) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) frame_indices = np.linspace(0, total_frames - 1, num_frames).astype(int) for i in frame_indices: cap.set(cv2.CAP_PROP_POS_FRAMES, i) ret, frame = cap.read() if not ret: break frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = Image.fromarray(frame) frame = transform(frame) frames.append(frame) cap.release() video_tensor = torch.stack(frames).permute(1, 0, 2, 3).unsqueeze(0) return video_tensor def extract_features(video_tensor): with torch.no_grad(): features = feature_extractor(video_tensor) features = F.adaptive_avg_pool3d(features, 1) features = features.flatten() return features.numpy() def video_features(video_path): video_tensor = preprocess_video(video_path) features = extract_features(video_tensor) return features video_path = r'D:\DESKTOP\2025\44\a1\dataset\org\0.mp4' video_f = video_features(video_path) print(f"视频features: {video_f.shape}")