|
|
import pandas as pd |
|
|
from scipy.io import loadmat, savemat |
|
|
import numpy as np |
|
|
|
|
|
def split_features(data_name, df, result_file, feature_path, layer_name): |
|
|
data = loadmat(result_file) |
|
|
all_variables = {} |
|
|
for key, value in data.items(): |
|
|
|
|
|
if not key.startswith('__') and not key.endswith('__'): |
|
|
all_variables[key] = value |
|
|
|
|
|
if data_name == 'konvid_1k': |
|
|
for i in range(len(all_variables['Test_videos_Median_model'])): |
|
|
test_vids = all_variables['Test_videos_Median_model'][i] |
|
|
test_vids = test_vids.tolist() |
|
|
else: |
|
|
test_vids = [] |
|
|
for i in range(len(all_variables['Test_videos_Median_model'])): |
|
|
vid = all_variables['Test_videos_Median_model'][i].strip() |
|
|
test_vids.append(vid) |
|
|
|
|
|
|
|
|
if data_name == 'youtube_ugc': |
|
|
grey_df = pd.read_csv(f'{metadata_path}/greyscale_report/{data_name.upper()}_greyscale_metadata.csv') |
|
|
grey_indices = grey_df.iloc[:, 0].tolist() |
|
|
df = df.drop(index=grey_indices).reset_index(drop=True) |
|
|
|
|
|
all_vids = df.iloc[:, 0].tolist() |
|
|
print(all_vids) |
|
|
print(test_vids) |
|
|
train_vids = list(set(all_vids) - set(test_vids)) |
|
|
print(len(test_vids)) |
|
|
print(len(train_vids)) |
|
|
|
|
|
|
|
|
train_df = df[df.iloc[:, 0].isin(train_vids)] |
|
|
test_df = df[df.iloc[:, 0].isin(test_vids)] |
|
|
print(len(test_df)) |
|
|
|
|
|
|
|
|
sorted_train_df = pd.DataFrame({'vid': train_df.iloc[:, 0], 'framerate': train_df['framerate'], 'MOS': train_df['mos']}) |
|
|
sorted_test_df = pd.DataFrame({'vid': test_df.iloc[:, 0], 'framerate': test_df['framerate'], 'MOS': test_df['mos']}) |
|
|
|
|
|
|
|
|
data = loadmat(f'{feature_path}{layer_name}/original_features/{network_name}_{data_name}_original_features.mat') |
|
|
features = data[f'{data_name}'] |
|
|
|
|
|
if data_name == 'youtube_ugc': |
|
|
features = np.delete(features, grey_indices, axis=0) |
|
|
|
|
|
train_features = features[train_df.index] |
|
|
test_features = features[test_df.index] |
|
|
|
|
|
|
|
|
sorted_train_df.to_csv(f'{metadata_path}mos_files/{data_name}_MOS_train.csv', index=False) |
|
|
sorted_test_df.to_csv(f'{metadata_path}mos_files/{data_name}_MOS_test.csv', index=False) |
|
|
savemat(f'{feature_path}{layer_name}/relaxvqa_{data_name}_original_train_features.mat', {f'{data_name}_train_features': train_features}) |
|
|
savemat(f'{feature_path}{layer_name}/relaxvqa_{data_name}_original_test_features.mat', {f'{data_name}_test_features': test_features}) |
|
|
|
|
|
return train_features, test_features, test_vids |
|
|
|
|
|
if __name__ == '__main__': |
|
|
metadata_path = '../../metadata/' |
|
|
feature_path = '../../features_merged_frag/' |
|
|
result_path = f'../../log/result/' |
|
|
|
|
|
data_name = 'cvd_2014' |
|
|
network_name = 'relaxvqa' |
|
|
layer_name = 'pool' |
|
|
model_name = 'Mlp' |
|
|
select_criteria = 'byrmse' |
|
|
|
|
|
df = pd.read_csv(f'{metadata_path}/{data_name.upper()}_metadata.csv') |
|
|
result_file = f'{result_path}{data_name}_{network_name}_{select_criteria}.mat' |
|
|
train_features, test_features, test_vids = split_features(data_name, df, result_file, feature_path, layer_name) |
|
|
|