#!/usr/bin/env python # This code is to concatenate the batch GO Term Finder results (.tsv) generated from batch GTF perl code(Chris Johnson, U of Tulsa) into a list table import sys, os, string, glob # return the file list def list_all_files(Path): list_all_files = [] list_all_files = glob.glob(Path +'/*.txt.tsv') return list_all_files # Main function try: data_file_Path = sys.argv[1] output_file_Path = sys.argv[2] except: print 'Usage: python Concatenate_GTF_results.py /datasetPath /outputFilePath_and_Name' print 'Data file not found, error in given directory' sys.exit(1) # Open the output file try: output = open(output_file_Path, 'w') except OSError: print 'output file error' # get all the GTF result files in given directory File_list = [] File_list = list_all_files(data_file_Path) File_list.sort() i = 0 for file in File_list: #parse the file names given in absolute path file_name = file.strip().split('/')[-1] file_name = file_name.rstrip('.txt.tsv') # function to read tsv files from a given directory #open the file data = open(file,'r') #reading the label line labelLine = data.readline() label = labelLine.strip().split('\t') #write the label #updates2010July26: update following label writing code if i == 0: # output.write('cluster origin') for element in label: output.write(element) output.write('\t') i = i + 1 #updates2010July26 End #switch to the next line output.write('\n') #read the GO terms GOTermLines = data.readlines() for GOTerm in GOTermLines: GOTerm = GOTerm.strip().strip('\t') if GOTerm != '': #updates2010July26: remove the code to write the first column 'REMc cluster ID' #output.write(file_name) #output.write('\t') ##updates2010July26 update end output.write(GOTerm + '\n') #output.write('\n') output.close()