|
- import pandas as pd
- import os
- import shutil
- import numpy as np
- from sklearn.covariance import LedoitWolf
- from scipy.spatial.distance import mahalanobis
-
- source_path = '/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/unlabel_11_12/'
- dist_path_01 = '/home/shanwei-luo/teamdata/anomaly_detection_active_learning/data0422/smd12_11_12_hard_score_04/train/'
- infer_data_unlabel=pd.read_csv('./test_unlabel_11_12.csv')
- print(infer_data_unlabel.shape)
-
- infer_data_train=pd.read_csv('./test_baseline_06_10.csv')
- print(infer_data_train.shape)
-
- '''infer_data.info()
- infer_data.describe()
- infer_data.head()'''
-
- train_feats = []
- for index, row in infer_data_train.iterrows():
- feat = row['feature'].split(",")
- feat[0] = feat[0][1:]
- feat[-1] = feat[-1][:-1]
- feat=list(map(float,feat))
- train_feats.append(feat)
-
- train_feats = np.array(train_feats)
- print(train_feats.shape)
- train_mean = np.mean(train_feats, axis=0)
- train_cov = LedoitWolf().fit(train_feats).covariance_
- train_cov_inv = np.linalg.inv(train_cov)
- print(train_mean.shape)
- print(train_cov.shape)
- print(train_cov_inv.shape)
-
- feat_dist = {}
- for index, row in infer_data_unlabel.iterrows():
- feat = row['feature'].split(",")
- feat[0] = feat[0][1:]
- feat[-1] = feat[-1][:-1]
- feat=list(map(float,feat))
- feat_dist[row['Image_Name']] = mahalanobis(feat, train_mean, train_cov_inv)
-
- feat_dist = sorted(feat_dist.items(), key=lambda x: x[1], reverse=True)
- #print(feat_dist)
-
- select_01 = []
- count = 0
- for k, v in feat_dist:
- if count<2750:
- select_01.append(k)
- #print(k, v)
- count += 1
- print(len(select_01))
-
- count_img = 0
- count_label = 0
- for file in select_01:
- shutil.copy(source_path+'images/'+file, dist_path_01+'images/'+file)
- count_img += 1
- if os.path.exists(source_path+'labels/'+file.replace(".jpg",".txt")):
- shutil.copy(source_path+'labels/'+file.replace(".jpg",".txt"), dist_path_01+'labels/'+file.replace(".jpg",".txt"))
- count_label += 1
- print(count_img, count_label)
-
-
- '''print(len(infer_data['feature'][0]))
- feat = infer_data['feature'][0].split(",")
- print(len(feat))
- print(feat[0])'''
|