Update model selection function.

5 years ago · 7f66196251
--- a/gklearn/utils/model_selection_precomputed.py
+++ b/gklearn/utils/model_selection_precomputed.py
@@ -30,6 +30,7 @@ def model_selection_for_precomputed_kernel(datafile,
                                           datafile_y=None,
                                           extra_params=None,
                                           ds_name='ds-unknown',
 										   output_dir='outputs/',
                                           n_jobs=1,
                                           read_gm_from_file=False,
                                           verbose=True):
@@ -56,7 +57,7 @@ def model_selection_for_precomputed_kernel(datafile,
    model_type : string
        Type of the problem, can be 'regression' or 'classification'.
    NUM_TRIALS : integer
        Number of random trials of outer cv loop. The default is 30.
        Number of random trials of the outer CV loop. The default is 30.
    datafile_y : string
        Path of file storing y data. This parameter is optional depending on 
        the given dataset file.
@@ -89,9 +90,9 @@ def model_selection_for_precomputed_kernel(datafile,
    """
    tqdm.monitor_interval = 0

    results_dir = '../notebooks/results/' + estimator.__name__
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    output_dir += estimator.__name__
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    # a string to save all the results.
    str_fw = '###################### log time: ' + datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + '. ######################\n\n'
    str_fw += '# This file contains results of ' + estimator.__name__ + ' on dataset ' + ds_name + ',\n# including gram matrices, serial numbers for gram matrix figures and performance.\n\n'
@@ -209,7 +210,7 @@ def model_selection_for_precomputed_kernel(datafile,
 #                            threshold=np.inf,
 #                            floatmode='unique') + '\n\n'

                    fig_file_name = results_dir + '/GM[ds]' + ds_name
                    fig_file_name = output_dir + '/GM[ds]' + ds_name
                    if params_out != {}:
                        fig_file_name += '[params]' + str(idx)
                    plt.imshow(Kmatrix)
@@ -244,7 +245,7 @@ def model_selection_for_precomputed_kernel(datafile,
            str_fw += '\nall gram matrices are ignored, no results obtained.\n\n'
        else:
            # save gram matrices to file.
 #            np.savez(results_dir + '/' + ds_name + '.gm', 
 #            np.savez(output_dir + '/' + ds_name + '.gm', 
 #                     gms=gram_matrices, params=param_list_pre_revised, y=y, 
 #                     gmtime=gram_matrix_time)
            if verbose:
@@ -450,7 +451,7 @@ def model_selection_for_precomputed_kernel(datafile,
            print()
            print('2. Reading gram matrices from file...')
        str_fw += '\nII. Gram matrices.\n\nGram matrices are read from file, see last log for detail.\n'
        gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
        gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz')
        gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
        gram_matrix_time = gmfile['gmtime'] # time used to compute the gram matrices
        param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
@@ -603,8 +604,8 @@ def model_selection_for_precomputed_kernel(datafile,
        str_fw += 'training time with hyper-param choices who did not participate in calculation of gram matrices: {:.2f}s\n\n'.format(tt_poster)

        # open file to save all results for this dataset.
        if not os.path.exists(results_dir):
            os.makedirs(results_dir)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
            
    # print out results as table.
    str_fw += printResultsInTable(param_list, param_list_pre_revised, average_val_scores,
@@ -613,11 +614,11 @@ def model_selection_for_precomputed_kernel(datafile,
              model_type, verbose)
            
    # open file to save all results for this dataset.
    if not os.path.exists(results_dir + '/' + ds_name + '.output.txt'):
        with open(results_dir + '/' + ds_name + '.output.txt', 'w') as f:
    if not os.path.exists(output_dir + '/' + ds_name + '.output.txt'):
        with open(output_dir + '/' + ds_name + '.output.txt', 'w') as f:
            f.write(str_fw)
    else:
        with open(results_dir + '/' + ds_name + '.output.txt', 'r+') as f:
        with open(output_dir + '/' + ds_name + '.output.txt', 'r+') as f:
            content = f.read()
            f.seek(0, 0)
            f.write(str_fw + '\n\n\n' + content)
@@ -797,7 +798,7 @@ def parallel_trial_do(param_list_pre_revised, param_list, y, model_type, trial):


 def compute_gram_matrices(dataset, y, estimator, param_list_precomputed, 
                          results_dir, ds_name,
                          output_dir, ds_name,
                          n_jobs=1, str_fw='', verbose=True):
    gram_matrices = [
        ]  # a list to store gram matrices for all param_grid_precomputed
@@ -867,7 +868,7 @@ def compute_gram_matrices(dataset, y, estimator, param_list_precomputed,
 #                            threshold=np.inf,
 #                            floatmode='unique') + '\n\n'

                fig_file_name = results_dir + '/GM[ds]' + ds_name
                fig_file_name = output_dir + '/GM[ds]' + ds_name
                if params_out != {}:
                    fig_file_name += '[params]' + str(idx)
                plt.imshow(Kmatrix)
@@ -897,8 +898,8 @@ def compute_gram_matrices(dataset, y, estimator, param_list_precomputed,
    return gram_matrices, gram_matrix_time, param_list_pre_revised, y, str_fw


 def read_gram_matrices_from_file(results_dir, ds_name):
    gmfile = np.load(results_dir + '/' + ds_name + '.gm.npz')
 def read_gram_matrices_from_file(output_dir, ds_name):
    gmfile = np.load(output_dir + '/' + ds_name + '.gm.npz')
    gram_matrices = gmfile['gms'] # a list to store gram matrices for all param_grid_precomputed
    param_list_pre_revised = gmfile['params'] # list to store param grids precomputed ignoring the useless ones
    y = gmfile['y'].tolist()