|
- import numpy as np
- import os, pickle, tqdm, json
- import paddle
- import random
-
- def set_seed_paddle(seed=1024):
- seed = int(seed)
- random.seed(seed)
- os.environ['PYTHONHASHSEED'] = str(seed)
- np.random.seed(seed)
- paddle.seed(seed)
-
- def iou_with_anchors(anchors_min, anchors_max, box_min, box_max):
- """Compute jaccard score between a box and the anchors.
- """
- len_anchors = anchors_max - anchors_min
- int_xmin = np.maximum(anchors_min, box_min)
- int_xmax = np.minimum(anchors_max, box_max)
- inter_len = np.maximum(int_xmax - int_xmin, 0.)
- union_len = len_anchors - inter_len + box_max - box_min
- jaccard = np.divide(inter_len, union_len)
- return jaccard
-
-
- def boundary_choose(score_list):
- """Choose start and end boundary from score.
- """
- max_score = max(score_list)
- mask_high = (score_list > max_score * 0.5)
- score_list = list(score_list)
- score_middle = np.array([0.0] + score_list + [0.0])
- score_front = np.array([0.0, 0.0] + score_list)
- score_back = np.array(score_list + [0.0, 0.0])
- mask_peak = ((score_middle > score_front) & (score_middle > score_back))
- mask_peak = mask_peak[1:-1]
- mask = (mask_high | mask_peak).astype('float32')
- return mask
-
-
- def soft_nms(proposal_dict, alpha=0.4, t1=0.55, t2=0.9, dscale=4): #dscale指的是切片视频的持续时间
- '''
- proposal_dict: proposals generated by network;
- alpha: alpha value of Gaussian decaying function;
- t1, t2: threshold for soft nms.
- '''
- sorted_proposal = sorted(proposal_dict, key=lambda x:x["score"], reverse=True)
- tstart = []
- tend = []
- tscore = []
- for pp in sorted_proposal:
- tstart.append(pp["segment"][0])
- tend.append(pp["segment"][1])
- tscore.append(pp["score"])
-
- rstart = []
- rend = []
- rscore = []
-
- while len(tscore) > 1 and len(rscore) < 101:
- max_index = tscore.index(max(tscore))
- tmp_iou_list = iou_with_anchors(np.array(tstart), np.array(tend),
- tstart[max_index], tend[max_index])
- for idx in range(0, len(tscore)):
- if idx != max_index:
- tmp_iou = tmp_iou_list[idx]
- tmp_width = (tend[max_index] - tstart[max_index])/dscale #此处相比PaddleVideo中的做了修改,提高了AUC
- if tmp_iou > t1 + (t2 - t1) * tmp_width:
- tscore[idx] = tscore[idx] * np.exp(
- -np.square(tmp_iou) / alpha)
-
- rstart.append(tstart[max_index])
- rend.append(tend[max_index])
- rscore.append(tscore[max_index])
- tstart.pop(max_index)
- tend.pop(max_index)
- tscore.pop(max_index)
-
- new_proposal = []
- for i in range(len(rscore)):
- pp = {}
- pp['score'] = round(rscore[i], 2)
- pp["segment"] = [round(rstart[i], 2), round(rend[i], 2)]
- new_proposal.append(pp)
-
- return new_proposal
-
-
- def win_splitting_per_video_for_testing(root, feat_output_path, json_path, test_file_name=None, T=100, fps=25):
- # 划分测试视频,并建立视频信息json文件
- if not os.path.exists(feat_output_path):
- os.makedirs(feat_output_path)
- clip_ds = T/fps
- if test_file_name is None:
- test_file_name = os.listdir(root)
- else:
- test_file_name = [na[:-3]+'pkl' for na in test_file_name]
-
- print(f"开始以窗口大小为{T},步长为{T//2}来划分每一条视频(保存的每个视频切片不必包含提案)...")
-
- test_dict = {}
- for file_name in tqdm.tqdm(test_file_name):
- file_path = os.path.join(root, file_name)
- video_feat = pickle.load(open(file_path, "rb"))['image_feature']
- frames_len = len(video_feat)
- video_ds = frames_len/fps
- clip_count = 1
- for start_f in range(0, frames_len, T//2):
- end_f = start_f+T
- if end_f<frames_len:
- win_feat = video_feat[start_f:end_f]
- start_second = start_f/fps
- end_second = end_f/fps
- annotations = {"clip_s":start_second, "clip_e":end_second}
-
- else:
- win_feat = video_feat[frames_len-T:]
- start_second = (frames_len-T)/fps
- end_second = video_ds
- annotations = {"clip_s":start_second, "clip_e":end_second}
-
- clip_name = file_name[:-4]+"_clip"+str(clip_count)
- test_dict[clip_name+".mp4"] = {"subset":"validation", 'duration_second':clip_ds, 'annotations':annotations}
-
- with open(os.path.join(feat_output_path, clip_name+".pkl"), "wb") as f:
- pickle.dump({'image_feature':win_feat}, f)
- clip_count += 1
-
- print(feat_output_path+"中共有"+str(len(test_dict))+"条切片数据!")
- with open(json_path, 'w', encoding='utf-8') as f:
- json.dump(test_dict, f, ensure_ascii=False, indent=4)
-
-
- def win_splitting_per_video_for_training(video_info,
- root,
- feat_output_path,
- train_json_path,
- val_gt_json_path,
- orig_val_gt_json_path,
- val_ratio=0.025,
- T=100,
- fps=25):
- # 划分视频,得到训练验证视频集及信息json文件
- num_videos = len(video_info)
- train_num = int(num_videos*(1-val_ratio))
- val_num = num_videos-train_num
- print(f"总共{num_videos}条视频,其中{train_num}条视频划分后用于训练,其余{val_num}条视频用于验证!")
- print(f"开始以窗口大小为{T},步长为{T//2}来划分每一条视频(保存的每个视频切片至少包含一个完整的提案)...")
- if not os.path.exists(feat_output_path):
- os.makedirs(feat_output_path)
- train_dict = {}
- val_dict = {}
- val_orig_dict = {}
- clip_ds = T/fps
- for i, items in enumerate(video_info):
- file_name = items['url'][:-3]+"pkl"
- file_path = os.path.join(root, file_name)
- if not os.path.exists(file_path):
- continue
-
- end_ids = [action["end_id"] for action in items["actions"]]
- #print(end_ids)
- if len(end_ids)==0:
- print("被丢弃的无效数据:", items)
- continue
-
- video_annotations = [{"segment":[action["start_id"], action["end_id"]], "label":action["label_names"][0]} for action in items["actions"]]
- video_feat = pickle.load(open(file_path, "rb"))['image_feature']
- frames_len = len(video_feat)
- video_ds = frames_len/fps
-
- clip_count = 1
- for start_f in range(0, frames_len, T//2):
- end_f = start_f+T
- if end_f<frames_len:
- win_feat = video_feat[start_f:end_f]
- start_second = start_f/fps
- end_second = end_f/fps
- annotations = []
- for annt in video_annotations:
- if annt["segment"][0]>=start_second and annt["segment"][1]<=end_second:
- annotations.append({"segment": [annt["segment"][0]-start_second, annt["segment"][1]-start_second], "label":annt["label"]})
-
- else:
- win_feat = video_feat[frames_len-T:]
- start_second = (frames_len-T)/fps
- end_second = video_ds
- annotations = []
- for annt in video_annotations:
- if annt["segment"][0]>=start_second and annt["segment"][1]<=end_second:
- annotations.append({"segment": [annt["segment"][0]-start_second, annt["segment"][1]-start_second], "label":annt["label"]})
-
- if len(annotations)==0:
- clip_count += 1
- continue
- clip_name = items['url'][:-4]+"_clip"+str(clip_count)
- if i<train_num:
- train_dict[clip_name+".mp4"] = {"subset":"train", 'duration_second':clip_ds, 'annotations':annotations}
- else:
- train_dict[clip_name+".mp4"] = {"subset":"validation", 'duration_second':clip_ds, 'annotations':annotations}
- val_dict[clip_name+".mp4"] = {"subset":"validation", 'duration_second':clip_ds, 'annotations':annotations}
- assert len(win_feat)==T
- with open(os.path.join(feat_output_path, clip_name+".pkl"), "wb") as f:
- pickle.dump({'image_feature':win_feat}, f)
- clip_count += 1
- if i<train_num:
- os.remove(file_path)
- else:
- val_orig_dict[items['url']] = {"subset":"validation", 'num_frames':frames_len, 'annotations':video_annotations}
-
- print(feat_output_path+"中共有"+str(len(train_dict))+"条切片数据!")
- #生成训练所用的json文件
- with open(train_json_path, 'w', encoding='utf-8') as f:
- json.dump(train_dict, f, ensure_ascii=False, indent=4)
- #生成验证集保存的每个切片对应的提案标签的json文件
- with open(val_gt_json_path, 'w', encoding='utf-8') as f:
- new_val_dict = {'taxonomy': None, 'database': val_dict, 'version': None}
- json.dump(new_val_dict, f, ensure_ascii=False, indent=4)
- #生成验证集切分之前源视频对应的提案标签文件
- with open(orig_val_gt_json_path, 'w', encoding='utf-8') as f:
- val_orig_dict = {'taxonomy': None, 'database': val_orig_dict, 'version': None, 'fps': fps}
- json.dump(val_orig_dict, f, ensure_ascii=False, indent=4)
-
-
- def merging_output_per_video(test_root, #未划分的原视频特征存放路径
- proposal_filename, #预测的待拼接回每个原视频的切片提案信息文件路径
- merging_output_filename, #合并后每个视频对应的预测提案信息,用于提交
- win_T=100,
- fps=25,
- snms_alpha=0.4,
- snms_t1=0.55,
- snms_t2=0.9):
- # 合并回提交文件
- test_file_name = os.listdir(test_root)
- with open(proposal_filename, 'r', encoding='utf-8') as f:
- pred_dict = json.load(f)
-
- new_pred_dict = {"version":pred_dict["version"], "external_data": {}}
- results_dict = pred_dict["results"]
- new_results_dict = {}
- for file_name in tqdm.tqdm(test_file_name):
- file_path = os.path.join(test_root, file_name)
- video_feat = pickle.load(open(file_path, "rb"))['image_feature']
- frames_len = len(video_feat)
- clip_count = 1
- proposal = []
- for start_f in range(0, frames_len, win_T//2):
- end_f = start_f+win_T
- if end_f<frames_len:
- start_second = start_f/fps
-
- else:
- start_second = (frames_len-win_T)/fps
-
- clip_name = file_name[:-4]+"_clip"+str(clip_count)
- if (clip_name[2:]+".mp4") not in results_dict.keys(): #bmn_metric.py 第279行
- print("无提案信息的视频:", clip_name+".mp4")
- clip_count += 1
- continue
- if len(results_dict[clip_name[2:]+".mp4"])==0:
- print("无提案信息的视频:", clip_name+".mp4")
- clip_count += 1
- continue
- for pp in results_dict[clip_name[2:]+".mp4"]:
- pp["segment"][0] = pp["segment"][0]+start_second
- pp["segment"][1] = pp["segment"][1]+start_second
- proposal.append(pp)
- clip_count += 1
-
- proposal = soft_nms(proposal, alpha=snms_alpha, t1=snms_t1, t2=snms_t2, dscale=win_T//fps)
- proposal = sorted(proposal, key=lambda x:x["segment"][0], reverse=False)
- new_results_dict[file_name[:-4]] = proposal
-
- new_pred_dict["results"] = new_results_dict
- with open(merging_output_filename, 'w') as f:
- json.dump(new_pred_dict, f)
-
- def checking_submission(json_path1="", json_path2=""):
- with open(json_path1, 'r', encoding='utf-8') as f:
- results1 = json.load(f)["results"]
- with open(json_path2, 'r', encoding='utf-8') as f:
- results2 = json.load(f)["results"]
- for video_name in results1.keys():
- proposals1 = results1[video_name]
- proposals2 = results2[video_name]
- for pp1, pp2 in zip(proposals1, proposals2):
- if pp1["segment"][0]!=pp2["segment"][0] or pp1["segment"][1]!=pp2["segment"][1]:
- print("两份提交文件不一致!")
- return
- print("两份提交文件完全一致!")
|