# 生成score list score_list = tourist_data['action'].values.tolist() In [222]: vid_dict = defaultdict(list)
In [223]: for key, value in [(v, i) for i, v inenumerate(vid_list)]: ...: vid_dict[key].append(value)
In [224]: vid_dict Out[224]: defaultdict(list, {'2c9f91345bf13cac015bfce28ef31002': [7], '2c9f91345c2007f8015c2deee14c18cb': [2, 4, 6], '2c9f91345c3ed855015c52649f962d4f': [1, 5, 9], '2c9f91345c3ed855015c5ee9cb904681': [0, 3, 8]}) In [227]: rank_list=[] ...: for i in vid_dict: ...: score = 0 ...: for index in vid_dict[i]: ...: score += int(score_list[index]) ...: rank_list.append(score)
In [231]: vid_list = []
In [232]: for i in vid_dict: ...: vid_list.append(i) In [233]: vid_list Out[233]: ['2c9f91345bf13cac015bfce28ef31002', '2c9f91345c2007f8015c2deee14c18cb', '2c9f91345c3ed855015c5ee9cb904681', '2c9f91345c3ed855015c52649f962d4f']
In [234]: rank_list Out[234]: [1, 4, 5, 3] In [237]: vid_score = pd.DataFrame({'score':rank_list,'vid':vid_list})
生成score与vid的矩阵
针对生成的数据进行分析
使用altair对数据进行画图分析
1 2 3 4 5
# 导入csv文件 import pandas as pd from altair import Chart, load_dataset %matplotlib inline vids_score = pd.read_csv('./res/vid_score.csv')