#!/usr/bin/python import sys, os, getopt def usage(): print '' print 'remove doubles' print '' print 'takes file with dcache url\'s (base_file) and writes out into an output file (output_file)' print 'all url\'s removing the double filenames' print '' print 'required parameters:' print '--tier (-t) : hit, digi, dst to use the default file names' print '' print 'optional parameters:' print '--help (-h) : help' print '--debug (-d) : debug statements' print '' print '--base_file_name (-b) : base_file with dcache url\'s, default: _to_be_staged_with_doubles.cpy' print '--output_file_name (-o) : output_file with dcache url\'s not in both files, default: _to_be_staged.cpy' print '' def main(argv) : # default tier = '' base_file_name = '' output_file_name = '' try: opts, args = getopt.getopt(argv, "hdt:b:o:", ["help", "debug", "tier=", "base_file_name=", "output_file_name="]) except getopt.GetoptError: usage() sys.exit(2) # check command line parameter for opt, arg in opts : if opt in ("-h","--help") : usage() sys.exit() elif opt in ("-d","--debug") : global _debug _debug = 1 elif opt in ("-t","--tier") : tier = arg elif opt in ("-b","--base_file_name") : base_file_name = arg elif opt in ("-o","--output_file_name") : output_file_name = arg if base_file_name == '': if tier == '' : usage() sys.exit() else : base_file_name = tier+'_to_be_staged_with_doubles.cpy' if output_file_name == '': if tier == '' : usage() sys.exit() else : output_file_name = tier+'_to_be_staged.cpy' try: base = open(base_file_name) except IOError: print 'Could not open file: ',base_file_name sys.exit() output = open(output_file_name,'w') output_directory = '/uscms_scratch/lpctrk/gutsche/hg05_wh_lbb/dst' counter = 0 base_line = base.readline() files = [] while base_line : base_line = base_line.strip() files.append(base_line) base_line = base.readline() files_cleaned = [] for file in files : if len(files_cleaned) == 0 : files_cleaned.append(file) else : add = 1 for file_cleaned in files_cleaned : if file == file_cleaned : add = 0 if add == 1 : files_cleaned.append(file) for file_cleaned in files_cleaned : output_line = file_cleaned+'\n' output.write(output_line) counter += 1 print 'Files in output after removing doubles: ',counter if __name__ == '__main__' : main(sys.argv[1:])