| @@ -0,0 +1,350 @@ | |||
| import libsvm.*; | |||
| import java.io.*; | |||
| import java.util.*; | |||
| import java.text.DecimalFormat; | |||
| class svm_scale | |||
| { | |||
| private String line = null; | |||
| private double lower = -1.0; | |||
| private double upper = 1.0; | |||
| private double y_lower; | |||
| private double y_upper; | |||
| private boolean y_scaling = false; | |||
| private double[] feature_max; | |||
| private double[] feature_min; | |||
| private double y_max = -Double.MAX_VALUE; | |||
| private double y_min = Double.MAX_VALUE; | |||
| private int max_index; | |||
| private long num_nonzeros = 0; | |||
| private long new_num_nonzeros = 0; | |||
| private static void exit_with_help() | |||
| { | |||
| System.out.print( | |||
| "Usage: svm-scale [options] data_filename\n" | |||
| +"options:\n" | |||
| +"-l lower : x scaling lower limit (default -1)\n" | |||
| +"-u upper : x scaling upper limit (default +1)\n" | |||
| +"-y y_lower y_upper : y scaling limits (default: no y scaling)\n" | |||
| +"-s save_filename : save scaling parameters to save_filename\n" | |||
| +"-r restore_filename : restore scaling parameters from restore_filename\n" | |||
| ); | |||
| System.exit(1); | |||
| } | |||
| private BufferedReader rewind(BufferedReader fp, String filename) throws IOException | |||
| { | |||
| fp.close(); | |||
| return new BufferedReader(new FileReader(filename)); | |||
| } | |||
| private void output_target(double value) | |||
| { | |||
| if(y_scaling) | |||
| { | |||
| if(value == y_min) | |||
| value = y_lower; | |||
| else if(value == y_max) | |||
| value = y_upper; | |||
| else | |||
| value = y_lower + (y_upper-y_lower) * | |||
| (value-y_min) / (y_max-y_min); | |||
| } | |||
| System.out.print(value + " "); | |||
| } | |||
| private void output(int index, double value) | |||
| { | |||
| /* skip single-valued attribute */ | |||
| if(feature_max[index] == feature_min[index]) | |||
| return; | |||
| if(value == feature_min[index]) | |||
| value = lower; | |||
| else if(value == feature_max[index]) | |||
| value = upper; | |||
| else | |||
| value = lower + (upper-lower) * | |||
| (value-feature_min[index])/ | |||
| (feature_max[index]-feature_min[index]); | |||
| if(value != 0) | |||
| { | |||
| System.out.print(index + ":" + value + " "); | |||
| new_num_nonzeros++; | |||
| } | |||
| } | |||
| private String readline(BufferedReader fp) throws IOException | |||
| { | |||
| line = fp.readLine(); | |||
| return line; | |||
| } | |||
| private void run(String []argv) throws IOException | |||
| { | |||
| int i,index; | |||
| BufferedReader fp = null, fp_restore = null; | |||
| String save_filename = null; | |||
| String restore_filename = null; | |||
| String data_filename = null; | |||
| for(i=0;i<argv.length;i++) | |||
| { | |||
| if (argv[i].charAt(0) != '-') break; | |||
| ++i; | |||
| switch(argv[i-1].charAt(1)) | |||
| { | |||
| case 'l': lower = Double.parseDouble(argv[i]); break; | |||
| case 'u': upper = Double.parseDouble(argv[i]); break; | |||
| case 'y': | |||
| y_lower = Double.parseDouble(argv[i]); | |||
| ++i; | |||
| y_upper = Double.parseDouble(argv[i]); | |||
| y_scaling = true; | |||
| break; | |||
| case 's': save_filename = argv[i]; break; | |||
| case 'r': restore_filename = argv[i]; break; | |||
| default: | |||
| System.err.println("unknown option"); | |||
| exit_with_help(); | |||
| } | |||
| } | |||
| if(!(upper > lower) || (y_scaling && !(y_upper > y_lower))) | |||
| { | |||
| System.err.println("inconsistent lower/upper specification"); | |||
| System.exit(1); | |||
| } | |||
| if(restore_filename != null && save_filename != null) | |||
| { | |||
| System.err.println("cannot use -r and -s simultaneously"); | |||
| System.exit(1); | |||
| } | |||
| if(argv.length != i+1) | |||
| exit_with_help(); | |||
| data_filename = argv[i]; | |||
| try { | |||
| fp = new BufferedReader(new FileReader(data_filename)); | |||
| } catch (Exception e) { | |||
| System.err.println("can't open file " + data_filename); | |||
| System.exit(1); | |||
| } | |||
| /* assumption: min index of attributes is 1 */ | |||
| /* pass 1: find out max index of attributes */ | |||
| max_index = 0; | |||
| if(restore_filename != null) | |||
| { | |||
| int idx, c; | |||
| try { | |||
| fp_restore = new BufferedReader(new FileReader(restore_filename)); | |||
| } | |||
| catch (Exception e) { | |||
| System.err.println("can't open file " + restore_filename); | |||
| System.exit(1); | |||
| } | |||
| if((c = fp_restore.read()) == 'y') | |||
| { | |||
| fp_restore.readLine(); | |||
| fp_restore.readLine(); | |||
| fp_restore.readLine(); | |||
| } | |||
| fp_restore.readLine(); | |||
| fp_restore.readLine(); | |||
| String restore_line = null; | |||
| while((restore_line = fp_restore.readLine())!=null) | |||
| { | |||
| StringTokenizer st2 = new StringTokenizer(restore_line); | |||
| idx = Integer.parseInt(st2.nextToken()); | |||
| max_index = Math.max(max_index, idx); | |||
| } | |||
| fp_restore = rewind(fp_restore, restore_filename); | |||
| } | |||
| while (readline(fp) != null) | |||
| { | |||
| StringTokenizer st = new StringTokenizer(line," \t\n\r\f:"); | |||
| st.nextToken(); | |||
| while(st.hasMoreTokens()) | |||
| { | |||
| index = Integer.parseInt(st.nextToken()); | |||
| max_index = Math.max(max_index, index); | |||
| st.nextToken(); | |||
| num_nonzeros++; | |||
| } | |||
| } | |||
| try { | |||
| feature_max = new double[(max_index+1)]; | |||
| feature_min = new double[(max_index+1)]; | |||
| } catch(OutOfMemoryError e) { | |||
| System.err.println("can't allocate enough memory"); | |||
| System.exit(1); | |||
| } | |||
| for(i=0;i<=max_index;i++) | |||
| { | |||
| feature_max[i] = -Double.MAX_VALUE; | |||
| feature_min[i] = Double.MAX_VALUE; | |||
| } | |||
| fp = rewind(fp, data_filename); | |||
| /* pass 2: find out min/max value */ | |||
| while(readline(fp) != null) | |||
| { | |||
| int next_index = 1; | |||
| double target; | |||
| double value; | |||
| StringTokenizer st = new StringTokenizer(line," \t\n\r\f:"); | |||
| target = Double.parseDouble(st.nextToken()); | |||
| y_max = Math.max(y_max, target); | |||
| y_min = Math.min(y_min, target); | |||
| while (st.hasMoreTokens()) | |||
| { | |||
| index = Integer.parseInt(st.nextToken()); | |||
| value = Double.parseDouble(st.nextToken()); | |||
| for (i = next_index; i<index; i++) | |||
| { | |||
| feature_max[i] = Math.max(feature_max[i], 0); | |||
| feature_min[i] = Math.min(feature_min[i], 0); | |||
| } | |||
| feature_max[index] = Math.max(feature_max[index], value); | |||
| feature_min[index] = Math.min(feature_min[index], value); | |||
| next_index = index + 1; | |||
| } | |||
| for(i=next_index;i<=max_index;i++) | |||
| { | |||
| feature_max[i] = Math.max(feature_max[i], 0); | |||
| feature_min[i] = Math.min(feature_min[i], 0); | |||
| } | |||
| } | |||
| fp = rewind(fp, data_filename); | |||
| /* pass 2.5: save/restore feature_min/feature_max */ | |||
| if(restore_filename != null) | |||
| { | |||
| // fp_restore rewinded in finding max_index | |||
| int idx, c; | |||
| double fmin, fmax; | |||
| fp_restore.mark(2); // for reset | |||
| if((c = fp_restore.read()) == 'y') | |||
| { | |||
| fp_restore.readLine(); // pass the '\n' after 'y' | |||
| StringTokenizer st = new StringTokenizer(fp_restore.readLine()); | |||
| y_lower = Double.parseDouble(st.nextToken()); | |||
| y_upper = Double.parseDouble(st.nextToken()); | |||
| st = new StringTokenizer(fp_restore.readLine()); | |||
| y_min = Double.parseDouble(st.nextToken()); | |||
| y_max = Double.parseDouble(st.nextToken()); | |||
| y_scaling = true; | |||
| } | |||
| else | |||
| fp_restore.reset(); | |||
| if(fp_restore.read() == 'x') { | |||
| fp_restore.readLine(); // pass the '\n' after 'x' | |||
| StringTokenizer st = new StringTokenizer(fp_restore.readLine()); | |||
| lower = Double.parseDouble(st.nextToken()); | |||
| upper = Double.parseDouble(st.nextToken()); | |||
| String restore_line = null; | |||
| while((restore_line = fp_restore.readLine())!=null) | |||
| { | |||
| StringTokenizer st2 = new StringTokenizer(restore_line); | |||
| idx = Integer.parseInt(st2.nextToken()); | |||
| fmin = Double.parseDouble(st2.nextToken()); | |||
| fmax = Double.parseDouble(st2.nextToken()); | |||
| if (idx <= max_index) | |||
| { | |||
| feature_min[idx] = fmin; | |||
| feature_max[idx] = fmax; | |||
| } | |||
| } | |||
| } | |||
| fp_restore.close(); | |||
| } | |||
| if(save_filename != null) | |||
| { | |||
| Formatter formatter = new Formatter(new StringBuilder()); | |||
| BufferedWriter fp_save = null; | |||
| try { | |||
| fp_save = new BufferedWriter(new FileWriter(save_filename)); | |||
| } catch(IOException e) { | |||
| System.err.println("can't open file " + save_filename); | |||
| System.exit(1); | |||
| } | |||
| if(y_scaling) | |||
| { | |||
| formatter.format("y\n"); | |||
| formatter.format("%.16g %.16g\n", y_lower, y_upper); | |||
| formatter.format("%.16g %.16g\n", y_min, y_max); | |||
| } | |||
| formatter.format("x\n"); | |||
| formatter.format("%.16g %.16g\n", lower, upper); | |||
| for(i=1;i<=max_index;i++) | |||
| { | |||
| if(feature_min[i] != feature_max[i]) | |||
| formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]); | |||
| } | |||
| fp_save.write(formatter.toString()); | |||
| fp_save.close(); | |||
| } | |||
| /* pass 3: scale */ | |||
| while(readline(fp) != null) | |||
| { | |||
| int next_index = 1; | |||
| double target; | |||
| double value; | |||
| StringTokenizer st = new StringTokenizer(line," \t\n\r\f:"); | |||
| target = Double.parseDouble(st.nextToken()); | |||
| output_target(target); | |||
| while(st.hasMoreElements()) | |||
| { | |||
| index = Integer.parseInt(st.nextToken()); | |||
| value = Double.parseDouble(st.nextToken()); | |||
| for (i = next_index; i<index; i++) | |||
| output(i, 0); | |||
| output(index, value); | |||
| next_index = index + 1; | |||
| } | |||
| for(i=next_index;i<= max_index;i++) | |||
| output(i, 0); | |||
| System.out.print("\n"); | |||
| } | |||
| if (new_num_nonzeros > num_nonzeros) | |||
| System.err.print( | |||
| "WARNING: original #nonzeros " + num_nonzeros+"\n" | |||
| +" new #nonzeros " + new_num_nonzeros+"\n" | |||
| +"Use -l 0 if many original feature values are zeros\n"); | |||
| fp.close(); | |||
| } | |||
| public static void main(String argv[]) throws IOException | |||
| { | |||
| svm_scale s = new svm_scale(); | |||
| s.run(argv); | |||
| } | |||
| } | |||