Mercurial > repos > charles-bernard > alfa
changeset 45:f4b26211e3d8 draft
Uploaded
author | charles-bernard |
---|---|
date | Thu, 21 Dec 2017 12:58:57 -0500 |
parents | 3a051528e47d |
children | 5d745637f045 |
files | alfa/ALFA.py alfa/ALFA_wrapper.py |
diffstat | 2 files changed, 45 insertions(+), 36 deletions(-) [+] |
line wrap: on
line diff
--- a/alfa/ALFA.py Thu Dec 21 11:56:24 2017 -0500 +++ b/alfa/ALFA.py Thu Dec 21 12:58:57 2017 -0500 @@ -18,7 +18,7 @@ from matplotlib.backends.backend_pdf import PdfPages # To correctly embed the texts when saving plots in svg format import matplotlib -import progressbar +# import progressbar import collections import matplotlib as mpl import numpy as np @@ -352,14 +352,14 @@ stranded_index_fh.write("#%s\t%s\n" % (key, value)) # Progress bar to track the genome indexes creation nb_lines = sum(1 for _ in open(annotation)) - pbar = progressbar.ProgressBar(widgets=["Indexing the genome ", progressbar.Percentage(), " ", progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start() + # pbar = progressbar.ProgressBar(widgets=["Indexing the genome ", progressbar.Percentage(), " ", progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start() # Browsing the GTF file and writing into genome index files with open(annotation, "r") as gtf_fh: for line in gtf_fh: i += 1 # Update the progressbar every 1k lines - if i % 1000 == 1: - pbar.update(i) + # if i % 1000 == 1: + # pbar.update(i) # Processing lines except comment ones if not line.startswith("#"): # Getting the line info @@ -437,7 +437,7 @@ # Store the categories of the last chromosome register_interval(intervals_dict, chrom, stranded_genome_index, unstranded_genome_index) - pbar.finish() + # pbar.finish() def generate_bedgraph_files(sample_labels, bam_files): @@ -446,9 +446,9 @@ #sample_labels = [] # Progress bar to track the BedGraph file creation #pbar = progressbar.ProgressBar(widgets=["Generating the BedGraph files ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], max_value=len(bam_files)+1).start() - pbar = progressbar.ProgressBar(widgets=["Generating the BedGraph files ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=len(sample_labels)+1).start() + # pbar = progressbar.ProgressBar(widgets=["Generating the BedGraph files ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=len(sample_labels)+1).start() n = 1 - pbar.update(n) + # pbar.update(n) #for n in range(0, len(bam_files), 2): for sample_label, bam_file in zip(sample_labels, bam_files): # Get the label for this sample @@ -458,23 +458,23 @@ if options.strandness in ["forward", "fr-firststrand"]: #subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_files[n] + " > " + modified_label + ".plus.bedgraph", shell=True) subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_file + " > " + sample_label + ".plus.bedgraph", shell=True) - pbar.update(n + 0.5) + # pbar.update(n + 0.5) #subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_files[n] + " > " + modified_label + ".minus.bedgraph", shell=True) subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_file + " > " + sample_label + ".minus.bedgraph", shell=True) - pbar.update(n + 0.5) + # pbar.update(n + 0.5) elif options.strandness in ["reverse", "fr-secondstrand"]: #subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_files[n] + " > " + modified_label + ".plus.bedgraph", shell=True) subprocess.call("bedtools genomecov -bg -split -strand - -ibam " + bam_file + " > " + sample_label + ".plus.bedgraph", shell=True) - pbar.update(n + 0.5) + # pbar.update(n + 0.5) #subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_files[n] + " > " + modified_label + ".minus.bedgraph", shell=True) subprocess.call("bedtools genomecov -bg -split -strand + -ibam " + bam_file + " > " + sample_label + ".minus.bedgraph", shell=True) - pbar.update(n + 0.5) + # pbar.update(n + 0.5) else: #subprocess.call("bedtools genomecov -bg -split -ibam " + bam_files[n] + " > " + modified_label + ".bedgraph", shell=True) subprocess.call("bedtools genomecov -bg -split -ibam " + bam_file + " > " + sample_label + ".bedgraph", shell=True) - pbar.update(n + 1) + # pbar.update(n + 1) #sample_files.append(modified_label) - pbar.finish() + # pbar.finish() #return sample_files, sample_labels return None @@ -579,7 +579,7 @@ # Progress bar to track the BedGraph and index intersection #pbar = progressbar.ProgressBar(widgets=["Processing " + sample_file + " ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], max_value=nb_lines).start() - pbar = progressbar.ProgressBar(widgets=["Processing " + sample_label + " ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start() + # pbar = progressbar.ProgressBar(widgets=["Processing " + sample_label + " ", progressbar.Percentage(), progressbar.Bar(), progressbar.Timer()], maxval=nb_lines).start() i = 0 # Intersecting the BedGraph and index files @@ -592,8 +592,8 @@ # Running through the BedGraph file for bam_line in bedgraph_fh: i += 1 - if i % 10000 == 0: - pbar.update(i) + # if i % 10000 == 0: + # pbar.update(i) # Getting the BAM line info bam_chrom = bam_line.split("\t")[0] bam_start, bam_stop, bam_cpt = map(float, bam_line.split("\t")[1:4]) @@ -673,7 +673,7 @@ #cpt[sample_name][("intergenic", "intergenic")] = (bam_stop - bam_start) * bam_cpt cpt[sample_label][("intergenic", "intergenic")] = (bam_stop - bam_start) * bam_cpt gtf_index_file.close() - pbar.finish() + # pbar.finish() return cpt @@ -1085,7 +1085,7 @@ ## Showing the plot if pdf: ## TODO: allow several output formats - pdf.savefig(format="pdf") + pdf.savefig() plt.close() elif svg: if svg == True:
--- a/alfa/ALFA_wrapper.py Thu Dec 21 11:56:24 2017 -0500 +++ b/alfa/ALFA_wrapper.py Thu Dec 21 12:58:57 2017 -0500 @@ -44,26 +44,29 @@ args = parser.parse_args() return args -def symlink_user_indexes(stranded_index, unstranded_index): +def symlink_user_indexes(stranded_index, unstranded_index, tmp_dir): index='index' - os.symlink(stranded_index, index + '.stranded.index') - os.symlink(unstranded_index, index + '.unstranded.index') + os.symlink(stranded_index, os.path.join(tmp_dir, index + '.stranded.index')) + os.symlink(unstranded_index, os.path.join(tmp_dir, index + '.unstranded.index')) return index -def get_input2_args(reads_list, format): +def get_input2_args(reads_list, format, tmp_dir): n = len(reads_list) if n%2 != 0: exit_and_explain('Problem with pairing reads filename and reads label') if format == 'bam': input2_args = '--bam' - elif format == 'begraph': + elif format == 'bedgraph': input2_args = '--bedgraph' - input2_args='-i' k = 0 reads_filenames = [''] * (n/2) reads_labels = [''] * (n/2) for i in range(0, n, 2): - reads_filenames[k] = reads_list[i].split('__fname__')[1] + curr_filename = reads_list[i].split('__fname__')[1] + # Alfa checks extension so the filename must end either by .bedgraph or by .bam + # We then create a symlink from file.dat to tmp_dir/annotation_n.<format> to avoid the error message + reads_filenames[k] = os.path.join(tmp_dir, 'annotation_' + str(k) + '.' + format) + os.symlink(curr_filename, reads_filenames[k]) cur_label = reads_list[i+1].split('__label__')[1] reads_labels[k] = re.sub(r' ', '_', cur_label) if not reads_labels[k]: @@ -88,7 +91,7 @@ def merge_count_files(reads_labels): merged_count_file = open('count_file.txt', 'wb') for i in range(0, len(reads_labels)): - current_count_file = open('%s.categories_counts' % reads_labels[i], 'r') + current_count_file = open('%s.feature_counts.tsv' % reads_labels[i], 'r') merged_count_file.write('##LABEL: %s\n\n' % reads_labels[i]) merged_count_file.write(current_count_file.read()) merged_count_file.write('__________________________________________________________________\n') @@ -108,7 +111,7 @@ #INPUT1: Annotation File if args.indexes: # The indexes submitted by the user must exhibit the suffix '.(un)stranded.index' and will be called by alfa by their prefix - index = symlink_user_indexes(args.indexes[0], args.indexes[1]) + index = symlink_user_indexes(args.indexes[0], args.indexes[1], tmp_dir) input1_args = '-g "%s"' % index elif args.bi_indexes: input1_args = '-g "%s"' % args.bi_indexes[0] @@ -119,7 +122,7 @@ #INPUT 2: Mapped Reads if args.reads: - input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0]) + input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0], tmp_dir) strandness = '-s %s' % args.strandness[0] else: exit_and_explain('No reads submitted !') @@ -129,17 +132,21 @@ if not (args.output_pdf or args.output_png or args.output_svg): output_args = '--n' else: + plot_suffix = os.path.join(tmp_dir, "ALFA_plot"); if args.output_pdf: - output_args = '--pdf plot.pdf' + output_args = '--pdf ' + plot_suffix + '.pdf' if args.output_png: - output_args = '--png plot' + output_args = '--png ' + plot_suffix if args.output_svg: - output_args = '--svg plot' + output_args = '--svg ' + plot_suffix if args.threshold: output_args = '%s -t %.3f %.3f' % (output_args, args.threshold[0], args.threshold[1]) ##Run alfa cmd = 'python %s %s %s %s %s %s' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args) + # Change into the tmp dir because ALFA produces files in the current dir + curr_dir = os.getcwd() + os.chdir(tmp_dir) logging.info("__________________________________________________________________\n") logging.info("Alfa execution") logging.info("__________________________________________________________________\n") @@ -155,13 +162,13 @@ ##Redirect outputs if args.output_pdf: - shutil.move('plot.pdf', args.output_pdf[0]) + shutil.move(plot_suffix + '.pdf', args.output_pdf[0]) if args.output_png: - shutil.move('plot' + '.categories.png', args.output_png[0]) - shutil.move('plot' + '.biotypes.png', args.output_png[1]) + shutil.move(plot_suffix + '.categories.png', args.output_png[0]) + shutil.move(plot_suffix + '.biotypes.png', args.output_png[1]) if args.output_svg: - shutil.move('plot' + '.categories.svg', args.output_svg[0]) - shutil.move('plot' + '.biotypes.svg', args.output_svg[1]) + shutil.move(plot_suffix + '.categories.svg', args.output_svg[0]) + shutil.move(plot_suffix + '.biotypes.svg', args.output_svg[1]) if args.output_count: count_filename = merge_count_files(reads_labels) shutil.move(count_filename, args.output_count[0]) @@ -179,5 +186,7 @@ shutil.move(args.bi_indexes[0] + '.stranded.index', args.output_index[0]) shutil.move(args.bi_indexes[1] + '.unstranded.index', args.output_index[1]) + # Get back to the original dir and cleanup the tmp dir + os.chdir(curr_dir) cleanup_before_exit(tmp_dir) main()