| @@ -30,6 +30,7 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| datafile_y=None, | datafile_y=None, | ||||
| extra_params=None, | extra_params=None, | ||||
| ds_name='ds-unknown', | ds_name='ds-unknown', | ||||
| output_dir='outputs/', | |||||
| n_jobs=1, | n_jobs=1, | ||||
| read_gm_from_file=False, | read_gm_from_file=False, | ||||
| verbose=True): | verbose=True): | ||||
| @@ -56,7 +57,7 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| model_type : string | model_type : string | ||||
| Type of the problem, can be 'regression' or 'classification'. | Type of the problem, can be 'regression' or 'classification'. | ||||
| NUM_TRIALS : integer | NUM_TRIALS : integer | ||||
| Number of random trials of outer cv loop. The default is 30. | |||||
| Number of random trials of the outer CV loop. The default is 30. | |||||
| datafile_y : string | datafile_y : string | ||||
| Path of file storing y data. This parameter is optional depending on | Path of file storing y data. This parameter is optional depending on | ||||
| the given dataset file. | the given dataset file. | ||||
| @@ -89,9 +90,9 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| """ | """ | ||||
| tqdm.monitor_interval = 0 | tqdm.monitor_interval = 0 | ||||
| results_dir = '../notebooks/results/' + estimator.__name__ | |||||
| if not os.path.exists(results_dir): | |||||
| os.makedirs(results_dir) | |||||
| output_dir += estimator.__name__ | |||||
| if not os.path.exists(output_dir): | |||||
| os.makedirs(output_dir) | |||||
| # a string to save all the results. | # a string to save all the results. | ||||
| str_fw = '###################### log time: ' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '. ######################\n\n' | str_fw = '###################### log time: ' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '. ######################\n\n' | ||||
| str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\n# including gram matrices, serial numbers for gram matrix figures and performance.\n\n' | str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\n# including gram matrices, serial numbers for gram matrix figures and performance.\n\n' | ||||
| @@ -209,7 +210,7 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| # threshold=np.inf, | # threshold=np.inf, | ||||
| # floatmode='unique') + '\n\n' | # floatmode='unique') + '\n\n' | ||||
| fig_file_name = results_dir + '/GM[ds]' + ds_name | |||||
| fig_file_name = output_dir + '/GM[ds]' + ds_name | |||||
| if params_out != {}: | if params_out != {}: | ||||
| fig_file_name += '[params]' + str(idx) | fig_file_name += '[params]' + str(idx) | ||||
| plt.imshow(Kmatrix) | plt.imshow(Kmatrix) | ||||
| @@ -244,7 +245,7 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| str_fw += '\nall gram matrices are ignored, no results obtained.\n\n' | str_fw += '\nall gram matrices are ignored, no results obtained.\n\n' | ||||
| else: | else: | ||||
| # save gram matrices to file. | # save gram matrices to file. | ||||
| # np.savez(results_dir + '/' + ds_name + '.gm', | |||||
| # np.savez(output_dir + '/' + ds_name + '.gm', | |||||
| # gms=gram_matrices, params=param_list_pre_revised, y=y, | # gms=gram_matrices, params=param_list_pre_revised, y=y, | ||||
| # gmtime=gram_matrix_time) | # gmtime=gram_matrix_time) | ||||
| if verbose: | if verbose: | ||||
| @@ -450,7 +451,7 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| print() | print() | ||||
| print('2. Reading gram matrices from file...') | print('2. Reading gram matrices from file...') | ||||
| str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n' | str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n' | ||||
| gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | |||||
| gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz') | |||||
| gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | ||||
| gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices | gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices | ||||
| param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones | param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones | ||||
| @@ -603,8 +604,8 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster) | str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster) | ||||
| # open file to save all results for this dataset. | # open file to save all results for this dataset. | ||||
| if not os.path.exists(results_dir): | |||||
| os.makedirs(results_dir) | |||||
| if not os.path.exists(output_dir): | |||||
| os.makedirs(output_dir) | |||||
| # print out results as table. | # print out results as table. | ||||
| str_fw += printResultsInTable(param_list, param_list_pre_revised, average_val_scores, | str_fw += printResultsInTable(param_list, param_list_pre_revised, average_val_scores, | ||||
| @@ -613,11 +614,11 @@ def model_selection_for_precomputed_kernel(datafile, | |||||
| model_type, verbose) | model_type, verbose) | ||||
| # open file to save all results for this dataset. | # open file to save all results for this dataset. | ||||
| if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'): | |||||
| with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f: | |||||
| if not os.path.exists(output_dir + '/' + ds_name + '.output.txt'): | |||||
| with open(output_dir + '/' + ds_name + '.output.txt', 'w') as f: | |||||
| f.write(str_fw) | f.write(str_fw) | ||||
| else: | else: | ||||
| with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f: | |||||
| with open(output_dir + '/' + ds_name + '.output.txt', 'r+') as f: | |||||
| content = f.read() | content = f.read() | ||||
| f.seek(0, 0) | f.seek(0, 0) | ||||
| f.write(str_fw + '\n\n\n' + content) | f.write(str_fw + '\n\n\n' + content) | ||||
| @@ -797,7 +798,7 @@ def parallel_trial_do(param_list_pre_revised, param_list, y, model_type, trial): | |||||
| def compute_gram_matrices(dataset, y, estimator, param_list_precomputed, | def compute_gram_matrices(dataset, y, estimator, param_list_precomputed, | ||||
| results_dir, ds_name, | |||||
| output_dir, ds_name, | |||||
| n_jobs=1, str_fw='', verbose=True): | n_jobs=1, str_fw='', verbose=True): | ||||
| gram_matrices = [ | gram_matrices = [ | ||||
| ] # a list to store gram matrices for all param_grid_precomputed | ] # a list to store gram matrices for all param_grid_precomputed | ||||
| @@ -867,7 +868,7 @@ def compute_gram_matrices(dataset, y, estimator, param_list_precomputed, | |||||
| # threshold=np.inf, | # threshold=np.inf, | ||||
| # floatmode='unique') + '\n\n' | # floatmode='unique') + '\n\n' | ||||
| fig_file_name = results_dir + '/GM[ds]' + ds_name | |||||
| fig_file_name = output_dir + '/GM[ds]' + ds_name | |||||
| if params_out != {}: | if params_out != {}: | ||||
| fig_file_name += '[params]' + str(idx) | fig_file_name += '[params]' + str(idx) | ||||
| plt.imshow(Kmatrix) | plt.imshow(Kmatrix) | ||||
| @@ -897,8 +898,8 @@ def compute_gram_matrices(dataset, y, estimator, param_list_precomputed, | |||||
| return gram_matrices, gram_matrix_time, param_list_pre_revised, y, str_fw | return gram_matrices, gram_matrix_time, param_list_pre_revised, y, str_fw | ||||
| def read_gram_matrices_from_file(results_dir, ds_name): | |||||
| gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz') | |||||
| def read_gram_matrices_from_file(output_dir, ds_name): | |||||
| gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz') | |||||
| gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed | ||||
| param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones | param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones | ||||
| y = gmfile['y'].tolist() | y = gmfile['y'].tolist() | ||||