diff --git a/src/cuffmerge b/src/cuffmerge index e12f232..452df65 100755 --- a/src/cuffmerge +++ b/src/cuffmerge @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # encoding: utf-8 """ cuffmerge.py @@ -6,6 +6,8 @@ cuffmerge.py Created by Cole Trapnell on 2011-03-17. Copyright (c) 2011 Cole Trapnell. All rights reserved. """ +from __future__ import print_function +from __future__ import absolute_import import sys import getopt @@ -17,6 +19,9 @@ import os import tempfile import warnings import types +from operator import itemgetter +from tempfile import mktemp + help_message = ''' cuffmerge takes two or more Cufflinks GTF files and merges them into a @@ -92,7 +97,7 @@ class TestParams: "num-threads=", "keep-tmp", "min-isoform-fraction="]) - except getopt.error, msg: + except getopt.error as msg: raise Usage(msg) self.system_params.parse_options(opts) @@ -104,7 +109,7 @@ class TestParams: # option processing for option, value in opts: if option in ("-v", "--version"): - print "merge_cuff_asms v%s" % (get_version()) + print("merge_cuff_asms v%s" % (get_version())) exit(0) if option in ("-h", "--help"): raise Usage(help_message) @@ -128,17 +133,17 @@ def right_now(): def prepare_output_dir(): - print >> sys.stderr, "[%s] Preparing output location %s" % (right_now(), output_dir) + print("[%s] Preparing output location %s" % (right_now(), output_dir), file=sys.stderr) if os.path.exists(output_dir): pass else: os.makedirs(output_dir) - #print >> sys.stderr, "Checking for %s", logging_dir + #print("Checking for %s", logging_dir, file=sys.stderr) if os.path.exists(logging_dir): pass else: - #print >> sys.stderr, "Creating %s", logging_dir + #print("Creating %s", logging_dir, file=sys.stderr) os.makedirs(logging_dir) if os.path.exists(tmp_dir): @@ -158,7 +163,7 @@ def tmp_name(prefix): pass else: os.mkdir(tmp_root) - return tmp_root + prefix + os.tmpnam().split('/')[-1] + return tmp_root + prefix + mktemp().split(os.sep)[-1] def cufflinks(out_dir, sam_file, @@ -168,9 +173,9 @@ def cufflinks(out_dir, lsf=False, curr_queue=None): if gtf_file != None: - print >> sys.stderr, "[%s] Quantitating transcripts" % (right_now()) + print("[%s] Quantitating transcripts" % (right_now()), file=sys.stderr) else: - print >> sys.stderr, "[%s] Assembling transcripts" % (right_now()) + print("[%s] Assembling transcripts" % (right_now()), file=sys.stderr) cmd = ["cufflinks"] @@ -191,20 +196,20 @@ def cufflinks(out_dir, cmd.append(sam_file) try: - print >> run_log, " ".join(cmd) + print(" ".join(cmd), file=run_log) ret = subprocess.call(cmd) if ret != 0: - print >> sys.stderr, fail_str, "Error: could not execute cufflinks" + print(fail_str, "Error: could not execute cufflinks", file=sys.stderr) exit(1) # cufflinks not found - except OSError, o: + except OSError as o: if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: - print >> sys.stderr, fail_str, "Error: cufflinks not found on this system. Did you forget to include it in your PATH?" + print(fail_str, "Error: cufflinks not found on this system. Did you forget to include it in your PATH?", file=sys.stderr) exit(1) def cuffcompare(prefix, ref_gtf, fasta, cuff_gtf): - print >> sys.stderr, "[%s] Comparing reference %s to assembly %s" % (right_now(), ref_gtf, cuff_gtf) + print("[%s] Comparing reference %s to assembly %s" % (right_now(), ref_gtf, cuff_gtf), file=sys.stderr) cmd = ["cuffcompare"] if prefix != None: @@ -213,22 +218,22 @@ def cuffcompare(prefix, ref_gtf, fasta, cuff_gtf): cmd.extend(["-r", ref_gtf]) if fasta != None: cmd.extend(["-s", fasta]) - if type(cuff_gtf) == types.ListType: + if isinstance(cuff_gtf, list): for g in cuff_gtf: cmd.extend([g]) else: cmd.extend([cuff_gtf]) try: - print >> run_log, " ".join(cmd) + print(" ".join(cmd), file=run_log) ret = subprocess.call(cmd) if ret != 0: - print >> sys.stderr, fail_str, "Error: could not execute cuffcompare" + print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr) exit(1) # cuffcompare not found - except OSError, o: + except OSError as o: if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: - print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?" + print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr) exit(1) def gtf_to_sam(gtf_filename): @@ -240,15 +245,15 @@ def gtf_to_sam(gtf_filename): cmd.append(gtf_filename) cmd.append(sam_out) try: - print >> run_log, " ".join(cmd) + print(" ".join(cmd), file=run_log) ret = subprocess.call(cmd) if ret != 0: - print >> sys.stderr, fail_str, "Error: could not execute gtf_to_sam" + print(fail_str, "Error: could not execute gtf_to_sam", file=sys.stderr) exit(1) # gtf_to_sam not found - except OSError, o: + except OSError as o: if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: - print >> sys.stderr, fail_str, "Error: gtf_to_sam not found on this system. Did you forget to include it in your PATH?" + print(fail_str, "Error: gtf_to_sam not found on this system. Did you forget to include it in your PATH?", file=sys.stderr) exit(1) return sam_out @@ -268,9 +273,9 @@ def test_input_files(filename_list): g = open(line,"r") input_files.append(line) - except OSError, o: + except OSError as o: if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: - print >> sys.stderr, fail_str, "Error: could not open %s" % line + print(fail_str, "Error: could not open %s" % line, file=sys.stderr) OK = False if not OK: sys.exit(1) @@ -279,16 +284,16 @@ def test_input_files(filename_list): def convert_gtf_to_sam(gtf_filename_list): """This function takes a list of GTF files, converts them all to temporary SAM files, and returns the list of temporary file names.""" - print >> sys.stderr, "[%s] Converting GTF files to SAM" % (right_now()) + print("[%s] Converting GTF files to SAM" % (right_now()), file=sys.stderr) OK = True sam_input_filenames = [] for line in gtf_filename_list: try: sam_out = gtf_to_sam(line) sam_input_filenames.append(sam_out) - except OSError, o: + except OSError as o: if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: - print >> sys.stderr, fail_str, "Error: could not open %s" % line + print(fail_str, "Error: could not open %s" % line, file=sys.stderr) OK = False if not OK: sys.exit(1) @@ -299,12 +304,12 @@ def merge_sam_inputs(sam_input_list, header): sorted_map = open(sorted_map_name, "w") - #print header + #print(header) # The header was built from a dict keyed by chrom, so # the records will be lexicographically ordered and # should match the BAM after the sort below. - print >> sorted_map, header, + print(header, end=' ', file=sorted_map) sorted_map.close() sort_cmd =["sort", @@ -315,48 +320,49 @@ def merge_sam_inputs(sam_input_list, header): "--temporary-directory="+tmp_dir] sort_cmd.extend(sam_input_list) - print >> run_log, " ".join(sort_cmd), ">", sorted_map_name + print(" ".join(sort_cmd), ">", sorted_map_name, file=run_log) subprocess.call(sort_cmd, stdout=open(sorted_map_name, "a")) return sorted_map_name def compare_to_reference(meta_asm_gtf, ref_gtf, fasta): - print >> sys.stderr, "[%s] Comparing against reference file %s" % (right_now(), ref_gtf) + global tmp_dir + print("[%s] Comparing against reference file %s" % (right_now(), ref_gtf), file=sys.stderr) ref_str = "" if ref_gtf != None: ref_str = " -r %s " % ref_gtf if fasta != None: - comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s -s %s %s''' % (ref_str, fasta, meta_asm_gtf) + comp_cmd = '''cuffcompare -o %s -C -G %s -s %s %s''' % (tmp_dir, ref_str, fasta, meta_asm_gtf) else: - comp_cmd = '''cuffcompare -o tmp_meta_asm -C -G %s %s''' % (ref_str, meta_asm_gtf) + comp_cmd = '''cuffcompare -o %s -C -G %s %s''' % (tmp_dir, ref_str, meta_asm_gtf) #cmd = bsub_cmd(comp_cmd, "/gencode_cmp", True, job_mem=8) cmd = comp_cmd try: - print >> run_log, cmd + print(cmd, file=run_log) ret = subprocess.call(cmd,shell=True) if ret != 0: - print >> sys.stderr, fail_str, "Error: could not execute cuffcompare" + print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr) exit(1) #tmap_out = meta_asm_gtf.split("/")[-1] + ".tmap" tfpath, tfname = os.path.split(meta_asm_gtf) if tfpath: tfpath+='/' - tmap_out = tfpath+'tmp_meta_asm.'+tfname+".tmap" + tmap_out = tfpath+'.'+tfname+".tmap" return tmap_out # cuffcompare not found - except OSError, o: + except OSError as o: if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: - print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?" + print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr) exit(1) def select_gtf(gtf_in_filename, ids, gtf_out_filename): f_gtf = open(gtf_in_filename) - #print >> sys.stderr, "Select GTF: Ids are: " - #print >> sys.stderr, ids - #print >> sys.stderr, "reference gtf file name:" - #print >> sys.stderr, gtf_in_filename + #print("Select GTF: Ids are: ", file=sys.stderr) + #print(ids, file=sys.stderr) + #print("reference gtf file name:", file=sys.stderr) + #print(gtf_in_filename, file=sys.stderr) out_gtf = open(gtf_out_filename, "w") for line in f_gtf: line = line.strip() @@ -370,13 +376,13 @@ def select_gtf(gtf_in_filename, ids, gtf_out_filename): first_quote = col.find('"') last_quote = col.find('"', first_quote + 1) transcript = col[first_quote + 1:last_quote] - #print >> sys.stderr, transcript + #print(transcript, file=sys.stderr) if transcript in ids: - print >> out_gtf, line + print(line, file=out_gtf) def merge_gtfs(gtf_filenames, merged_gtf, ref_gtf=None): - print >> sys.stderr, "[%s] Merging linc gtf files with cuffcompare" % (right_now()) + print("[%s] Merging linc gtf files with cuffcompare" % (right_now()), file=sys.stderr) cmd = ["cuffcompare"] cmd.extend(["-o", merged_gtf]) @@ -388,24 +394,24 @@ def merge_gtfs(gtf_filenames, merged_gtf, ref_gtf=None): #cmd = bsub_cmd(cmd, "/merge_gtf", True, job_mem=8) try: - print >> run_log, cmd + print(cmd, file=run_log) ret = subprocess.call(cmd, shell=True) if ret != 0: - print >> sys.stderr, fail_str, "Error: could not execute cuffcompare" + print(fail_str, "Error: could not execute cuffcompare", file=sys.stderr) exit(1) return merged_gtf + ".combined.gtf" # cuffcompare not found - except OSError, o: + except OSError as o: if o.errno == errno.ENOTDIR or o.errno == errno.ENOENT: - print >> sys.stderr, fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?" + print(fail_str, "Error: cuffcompare not found on this system. Did you forget to include it in your PATH?", file=sys.stderr) exit(1) def compare_meta_asm_against_ref(ref_gtf, fasta_file, gtf_input_file, class_codes=["c", "i", "r", "p", "e"]): - #print >> sys.stderr, "Cuffcmpare all assemblies GTFs" + #print("Cuffcmpare all assemblies GTFs", file=sys.stderr) tmap = compare_to_reference(gtf_input_file, ref_gtf, fasta_file) - #print >> sys.stderr, "Cuffcmpare all assemblies GTFs : filter %s" % ",".join(class_codes) + #print("Cuffcmpare all assemblies GTFs : filter %s" % ",".join(class_codes), file=sys.stderr) selected_ids= set([]) f_tmap = open(tmap) #out = open("tmp_meta_asm_selectedIds.txt", "w") @@ -434,15 +440,10 @@ def compare_meta_asm_against_ref(ref_gtf, fasta_file, gtf_input_file, class_code if os.path.exists(mtmap.split(".tmap")[0]+".refmap"): os.remove(mtmap.split(".tmap")[0]+".refmap") - shutil.move("tmp_meta_asm.combined.gtf", output_dir + "/merged.gtf") + global tmp_dir + shutil.move(tmp_dir+".combined.gtf", output_dir + "/merged.gtf") # os.remove("tmp_meta_asm.combined.gtf") - if os.path.exists("tmp_meta_asm.loci"): - os.remove("tmp_meta_asm.loci") - if os.path.exists("tmp_meta_asm.tracking"): - os.remove("tmp_meta_asm.tracking") - if os.path.exists("tmp_meta_asm.stats"): - os.remove("tmp_meta_asm.stats") if os.path.exists(tmap): os.remove(tmap) if os.path.exists(tmap.split(".tmap")[0]+".refmap"): @@ -484,9 +485,8 @@ def get_gtf_chrom_info(gtf_filename, known_chrom_info=None): def header_for_chrom_info(chrom_info): header_strs = ["""@HD\tVN:1.0\tSO:coordinate"""] - chrom_list = [(chrom, limits) for chrom, limits in chrom_info.iteritems()] - chrom_list.sort(lambda x,y: cmp(x[0],y[0])) - #print chrom_list + chrom_list = sorted(chrom_info.items(), key=itemgetter(0)) + #print(chrom_list) for chrom, limits in chrom_list: line = "@SQ\tSN:%s\tLN:\t%d" % (chrom, limits[1]) header_strs.append(line) @@ -510,29 +510,29 @@ def main(argv=None): params.check() if len(args) < 1: - raise(Usage(help_message)) + raise Usage(help_message) global run_log global run_cmd - print >> sys.stderr - print >> sys.stderr, "[%s] Beginning transcriptome assembly merge" % (right_now()) - print >> sys.stderr, "-------------------------------------------" - print >> sys.stderr + print(file=sys.stderr) + print("[%s] Beginning transcriptome assembly merge" % (right_now()), file=sys.stderr) + print("-------------------------------------------", file=sys.stderr) + print(file=sys.stderr) start_time = datetime.now() prepare_output_dir() run_log = open(logging_dir + "run.log", "w", 0) run_cmd = " ".join(argv) - print >> run_log, run_cmd + print(run_cmd, file=run_log) transfrag_list_file = open(args[0], "r") if params.ref_gtf != None: test_input_files([params.ref_gtf]) else: - print >> sys.stderr, "Warning: no reference GTF provided!" + print("Warning: no reference GTF provided!", file=sys.stderr) # Check that all the primary assemblies are accessible before starting the time consuming stuff gtf_input_files = test_input_files(transfrag_list_file) @@ -571,8 +571,8 @@ def main(argv=None): os.remove(output_dir + "/skipped.gtf") os.remove(output_dir + "/genes.fpkm_tracking") os.remove(output_dir + "/isoforms.fpkm_tracking") - except Usage, err: - print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) + except Usage as err: + print(sys.argv[0].split("/")[-1] + ": " + str(err.msg), file=sys.stderr) return 2