RoadToPPDE
/
基于飞桨实现乒乓球时序动作定位大赛之B榜第2名方案

 
			
							import numpy as np
import os, pickle, tqdm, json
import paddle
import random

def set_seed_paddle(seed=1024):
    seed = int(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    paddle.seed(seed)

def iou_with_anchors(anchors_min, anchors_max, box_min, box_max):
    """Compute jaccard score between a box and the anchors.
    """
    len_anchors = anchors_max - anchors_min
    int_xmin = np.maximum(anchors_min, box_min)
    int_xmax = np.minimum(anchors_max, box_max)
    inter_len = np.maximum(int_xmax - int_xmin, 0.)
    union_len = len_anchors - inter_len + box_max - box_min
    jaccard = np.divide(inter_len, union_len)
    return jaccard


def boundary_choose(score_list):
    """Choose start and end boundary from score.
    """
    max_score = max(score_list)
    mask_high = (score_list > max_score * 0.5)
    score_list = list(score_list)
    score_middle = np.array([0.0] + score_list + [0.0])
    score_front = np.array([0.0, 0.0] + score_list)
    score_back = np.array(score_list + [0.0, 0.0])
    mask_peak = ((score_middle > score_front) & (score_middle > score_back))
    mask_peak = mask_peak[1:-1]
    mask = (mask_high | mask_peak).astype('float32')
    return mask


def soft_nms(proposal_dict, alpha=0.4, t1=0.55, t2=0.9, dscale=4): #dscale指的是切片视频的持续时间
    '''
    proposal_dict: proposals generated by network;
    alpha: alpha value of Gaussian decaying function;
    t1, t2: threshold for soft nms.
    '''
    sorted_proposal = sorted(proposal_dict, key=lambda x:x["score"], reverse=True)
    tstart = []
    tend = []
    tscore = []
    for pp in sorted_proposal:
        tstart.append(pp["segment"][0])
        tend.append(pp["segment"][1])
        tscore.append(pp["score"])

    rstart = []
    rend = []
    rscore = []

    while len(tscore) > 1 and len(rscore) < 101:
        max_index = tscore.index(max(tscore))
        tmp_iou_list = iou_with_anchors(np.array(tstart), np.array(tend),
                                        tstart[max_index], tend[max_index])
        for idx in range(0, len(tscore)):
            if idx != max_index:
                tmp_iou = tmp_iou_list[idx]
                tmp_width = (tend[max_index] - tstart[max_index])/dscale #此处相比PaddleVideo中的做了修改，提高了AUC
                if tmp_iou > t1 + (t2 - t1) * tmp_width:
                    tscore[idx] = tscore[idx] * np.exp(
                        -np.square(tmp_iou) / alpha)

        rstart.append(tstart[max_index])
        rend.append(tend[max_index])
        rscore.append(tscore[max_index])
        tstart.pop(max_index)
        tend.pop(max_index)
        tscore.pop(max_index)

    new_proposal = []
    for i in range(len(rscore)):
        pp = {}
        pp['score'] = round(rscore[i], 2)
        pp["segment"] = [round(rstart[i], 2), round(rend[i], 2)]
        new_proposal.append(pp)
    
    return new_proposal


def win_splitting_per_video_for_testing(root, feat_output_path, json_path, test_file_name=None, T=100, fps=25):
    # 划分测试视频，并建立视频信息json文件
    if not os.path.exists(feat_output_path):
        os.makedirs(feat_output_path)
    clip_ds = T/fps
    if test_file_name is None:
        test_file_name = os.listdir(root)
    else:
        test_file_name = [na[:-3]+'pkl' for na in test_file_name]

    print(f"开始以窗口大小为{T}，步长为{T//2}来划分每一条视频（保存的每个视频切片不必包含提案）...")

    test_dict = {}
    for file_name in tqdm.tqdm(test_file_name):
        file_path = os.path.join(root, file_name)
        video_feat = pickle.load(open(file_path, "rb"))['image_feature']
        frames_len = len(video_feat)
        video_ds = frames_len/fps
        clip_count = 1
        for start_f in range(0, frames_len, T//2):
            end_f = start_f+T
            if end_f<frames_len:
                win_feat = video_feat[start_f:end_f]
                start_second = start_f/fps
                end_second = end_f/fps
                annotations = {"clip_s":start_second, "clip_e":end_second}

            else:
                win_feat = video_feat[frames_len-T:]
                start_second = (frames_len-T)/fps
                end_second = video_ds
                annotations = {"clip_s":start_second, "clip_e":end_second}

            clip_name = file_name[:-4]+"_clip"+str(clip_count)       
            test_dict[clip_name+".mp4"] = {"subset":"validation", 'duration_second':clip_ds, 'annotations':annotations}

            with open(os.path.join(feat_output_path, clip_name+".pkl"), "wb") as f:
                pickle.dump({'image_feature':win_feat}, f)
            clip_count += 1
    
    print(feat_output_path+"中共有"+str(len(test_dict))+"条切片数据！")
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(test_dict, f, ensure_ascii=False, indent=4)


def win_splitting_per_video_for_training(video_info,
                                        root, 
                                        feat_output_path, 
                                        train_json_path, 
                                        val_gt_json_path,
                                        orig_val_gt_json_path,
                                        val_ratio=0.025,
                                        T=100, 
                                        fps=25):
    # 划分视频，得到训练验证视频集及信息json文件
    num_videos = len(video_info)
    train_num = int(num_videos*(1-val_ratio))
    val_num = num_videos-train_num
    print(f"总共{num_videos}条视频，其中{train_num}条视频划分后用于训练，其余{val_num}条视频用于验证！")
    print(f"开始以窗口大小为{T}，步长为{T//2}来划分每一条视频（保存的每个视频切片至少包含一个完整的提案）...")
    if not os.path.exists(feat_output_path):
        os.makedirs(feat_output_path)
    train_dict = {}
    val_dict = {}
    val_orig_dict = {}
    clip_ds = T/fps
    for i, items in enumerate(video_info):
        file_name = items['url'][:-3]+"pkl"
        file_path = os.path.join(root, file_name)
        if not os.path.exists(file_path):
            continue
        
        end_ids = [action["end_id"] for action in items["actions"]]
        #print(end_ids)
        if len(end_ids)==0:
            print("被丢弃的无效数据：", items)
            continue
        
        video_annotations = [{"segment":[action["start_id"], action["end_id"]], "label":action["label_names"][0]} for action in items["actions"]] 
        video_feat = pickle.load(open(file_path, "rb"))['image_feature']
        frames_len = len(video_feat)
        video_ds = frames_len/fps
        
        clip_count = 1
        for start_f in range(0, frames_len, T//2):
            end_f = start_f+T
            if end_f<frames_len:
                win_feat = video_feat[start_f:end_f]
                start_second = start_f/fps
                end_second = end_f/fps
                annotations = []
                for annt in video_annotations:
                    if annt["segment"][0]>=start_second and annt["segment"][1]<=end_second:
                        annotations.append({"segment": [annt["segment"][0]-start_second, annt["segment"][1]-start_second], "label":annt["label"]})

            else:
                win_feat = video_feat[frames_len-T:]
                start_second = (frames_len-T)/fps
                end_second = video_ds
                annotations = []
                for annt in video_annotations:
                    if annt["segment"][0]>=start_second and annt["segment"][1]<=end_second:
                        annotations.append({"segment": [annt["segment"][0]-start_second, annt["segment"][1]-start_second], "label":annt["label"]})
            
            if len(annotations)==0:
                clip_count += 1
                continue
            clip_name = items['url'][:-4]+"_clip"+str(clip_count)
            if i<train_num:
                train_dict[clip_name+".mp4"] = {"subset":"train", 'duration_second':clip_ds, 'annotations':annotations}
            else:
                train_dict[clip_name+".mp4"] = {"subset":"validation", 'duration_second':clip_ds, 'annotations':annotations}
                val_dict[clip_name+".mp4"] = {"subset":"validation", 'duration_second':clip_ds, 'annotations':annotations}
            assert len(win_feat)==T
            with open(os.path.join(feat_output_path, clip_name+".pkl"), "wb") as f:
                pickle.dump({'image_feature':win_feat}, f)
            clip_count += 1
        if i<train_num:
            os.remove(file_path)
        else:
            val_orig_dict[items['url']] = {"subset":"validation", 'num_frames':frames_len, 'annotations':video_annotations}

    print(feat_output_path+"中共有"+str(len(train_dict))+"条切片数据！")
    #生成训练所用的json文件
    with open(train_json_path, 'w', encoding='utf-8') as f:
        json.dump(train_dict, f, ensure_ascii=False, indent=4)
    #生成验证集保存的每个切片对应的提案标签的json文件
    with open(val_gt_json_path, 'w', encoding='utf-8') as f:
        new_val_dict = {'taxonomy': None, 'database': val_dict, 'version': None}
        json.dump(new_val_dict, f, ensure_ascii=False, indent=4)
    #生成验证集切分之前源视频对应的提案标签文件
    with open(orig_val_gt_json_path, 'w', encoding='utf-8') as f:
        val_orig_dict = {'taxonomy': None, 'database': val_orig_dict, 'version': None, 'fps': fps}
        json.dump(val_orig_dict, f, ensure_ascii=False, indent=4)


def merging_output_per_video(test_root,  #未划分的原视频特征存放路径
                            proposal_filename, #预测的待拼接回每个原视频的切片提案信息文件路径
                            merging_output_filename, #合并后每个视频对应的预测提案信息，用于提交
                            win_T=100, 
                            fps=25,
                            snms_alpha=0.4, 
                            snms_t1=0.55,
                            snms_t2=0.9):
    # 合并回提交文件
    test_file_name = os.listdir(test_root)
    with open(proposal_filename, 'r', encoding='utf-8') as f:
        pred_dict = json.load(f)

    new_pred_dict = {"version":pred_dict["version"], "external_data": {}}
    results_dict = pred_dict["results"]
    new_results_dict = {}
    for file_name in tqdm.tqdm(test_file_name):
        file_path = os.path.join(test_root, file_name)
        video_feat = pickle.load(open(file_path, "rb"))['image_feature']
        frames_len = len(video_feat)
        clip_count = 1
        proposal = []
        for start_f in range(0, frames_len, win_T//2):
            end_f = start_f+win_T
            if end_f<frames_len:
                start_second = start_f/fps

            else:
                start_second = (frames_len-win_T)/fps

            clip_name = file_name[:-4]+"_clip"+str(clip_count)   
            if (clip_name[2:]+".mp4") not in results_dict.keys(): #bmn_metric.py 第279行
                print("无提案信息的视频：", clip_name+".mp4")
                clip_count += 1
                continue
            if len(results_dict[clip_name[2:]+".mp4"])==0:
                print("无提案信息的视频：", clip_name+".mp4") 
                clip_count += 1
                continue   
            for pp in results_dict[clip_name[2:]+".mp4"]:
                pp["segment"][0] = pp["segment"][0]+start_second
                pp["segment"][1] = pp["segment"][1]+start_second
                proposal.append(pp)
            clip_count += 1
        
        proposal = soft_nms(proposal, alpha=snms_alpha, t1=snms_t1, t2=snms_t2, dscale=win_T//fps)
        proposal = sorted(proposal, key=lambda x:x["segment"][0], reverse=False)
        new_results_dict[file_name[:-4]] = proposal

    new_pred_dict["results"] = new_results_dict
    with open(merging_output_filename, 'w') as f:
        json.dump(new_pred_dict, f)

def checking_submission(json_path1="", json_path2=""):
    with open(json_path1, 'r', encoding='utf-8') as f:
        results1 = json.load(f)["results"]
    with open(json_path2, 'r', encoding='utf-8') as f:
        results2 = json.load(f)["results"]
    for video_name in results1.keys():
        proposals1 = results1[video_name]
        proposals2 = results2[video_name]
        for pp1, pp2 in zip(proposals1, proposals2):
            if pp1["segment"][0]!=pp2["segment"][0] or pp1["segment"][1]!=pp2["segment"][1]:
                print("两份提交文件不一致！")
                return
    print("两份提交文件完全一致!")