Galaxy |

Changeset 0:e360f840a92e (2018-05-16)

Commit message:
Uploaded

added:
alfa/.shed.yml
alfa/ALFA.py
alfa/ALFA.xml
alfa/ALFA_wrapper.py
alfa/test-data/alfa_toy-Biofeatures Distribution.pdf
alfa/test-data/alfa_toy.bam
alfa/test-data/alfa_toy.bedgraph
alfa/test-data/alfa_toy.categories_counts
alfa/test-data/alfa_toy.gtf
alfa/test-data/alfa_toy.stranded.index
alfa/test-data/alfa_toy.unstranded.index
alfa/tool-data/alfa_indexes.loc.sample
alfa/tool_data_table_conf.xml.sample
alfa/tool_dependencies.xml

diff -r 000000000000 -r e360f840a92e alfa/.shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/.shed.yml Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,14 @@
+categories:
+- Graphics
+- Next Gen Mappers
+- Sequence Analysis
+- Visualization
+description: Plot the distribution of the genomic features captured by aligned reads
+long_description:
+ ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s).
+ Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.
+ https://github.com/biocompibens/ALFA
+name: alfa
+owner: charles_bernard
+remote_repository_url: https://github.com/biocompibens/ALFA/tree/master/Galaxy_toolshed_repositories/ALFA
+type: unrestricted

diff -r 000000000000 -r e360f840a92e alfa/ALFA.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/ALFA.py Wed May 16 09:49:18 2018 -0400

[

b'@@ -0,0 +1,1850 @@\n+#!/usr/bin/python\n+# -*- coding: utf-8 -*-\n+\n+__author__ = "noel & bahin"\n+""" ALFA provides a global overview of features distribution composing NGS dataset(s). """\n+\n+import argparse\n+import os\n+import numpy\n+import copy\n+import sys\n+import subprocess\n+import matplotlib.pyplot as plt\n+import matplotlib.cm as cmx\n+import matplotlib.colors as colors\n+import matplotlib.patheffects as PathEffects\n+import re\n+from matplotlib.backends.backend_pdf import PdfPages\n+# To correctly embed the texts when saving plots in svg format\n+import matplotlib\n+# import progressbar\n+import collections\n+import matplotlib as mpl\n+import numpy as np\n+\n+matplotlib.rcParams["svg.fonttype"] = "none"\n+\n+\n+##########################################################################\n+# FUNCTIONS #\n+##########################################################################\n+\n+def init_dict(d, key, init):\n+ if key not in d:\n+ d[key] = init\n+\n+\n+def tryint(s):\n+ """ Function called by "alphanum_key" function to sort the chromosome names. """\n+ try:\n+ return int(s)\n+ except ValueError:\n+ return s\n+\n+\n+def alphanum_key(s):\n+ """ Turn a string into a list of string and number chunks.\n+ "z23a" -> ["z", 23, "a"]\n+ """\n+ return [ tryint(c) for c in re.split("([0-9]+)", s) ]\n+\n+\n+def required_arg(arg, aliases):\n+ """ Function to display the help and quit if a required argument is missing. """\n+ if not arg:\n+ print >> sys.stderr, "\\nError: %s argument is missing.\\n" % aliases\n+ parser.print_usage()\n+ sys.exit(1)\n+\n+\n+def existing_file(filename):\n+ """ Checks if filename already exists and exit if so. """\n+ if os.path.isfile(filename):\n+ sys.exit("Error: The file \'" + filename + "\' is about to be produced but already exists in the directory. \\n### End of program")\n+\n+\n+def get_chromosome_lengths():\n+ """\n+ Parse the file containing the chromosome lengths.\n+ If no length file is provided, browse the annotation file (GTF) to estimate the chromosome sizes.\n+ """\n+ lengths = {}\n+ gtf_chrom_names = set()\n+ # If the user provided a chromosome length file\n+ if options.chr_len:\n+ # Getting the chromosome lengths from the chromosome lengths file\n+ with open(options.chr_len, "r") as chr_len_fh:\n+ for line in chr_len_fh:\n+ try:\n+ lengths[line.split("\\t")[0]] = int(line.rstrip().split("\\t")[1])\n+ except IndexError:\n+ sys.exit("Error: The chromosome lengths file is not correctly formed. It is supposed to be tabulated file with two fields per line.")\n+ # Getting the chromosome lengths from the GTF file\n+ with open(options.annotation, "r") as gtf_fh:\n+ for line in gtf_fh:\n+ if not line.startswith("#"):\n+ gtf_chrom_names.add(line.split("\\t")[0])\n+ # Checking if the chromosomes from the chromosome lengths file are present in the GTF file\n+ for chrom in lengths:\n+ if chrom not in gtf_chrom_names:\n+ print >> sys.stderr, "Warning: chromosome \'" + chrom + "\' of the chromosome lengths file does not match any chromosome name in the GTF file provided and was ignored."\n+ # Checking if the chromosomes from the GTF file are present in the lengths file\n+ for chrom in gtf_chrom_names:\n+ if chrom not in lengths:\n+ print >> sys.stderr, "Warning: at least one chromosome (\'" + chrom + "\') was found in the GTF file and does not match any chromosome provided in the lengths file."\n+ print >> sys.stderr, "\\t=> All the chromosome lengths will be approximated using annotations in the GTF file."\n+ break\n+ else:\n+ return lengths\n+ # If no chromosome lengths file was provided or if at least one chromosome was missing in the file, the end of t'..b' cpt = intersect_bedgraphs_and_index_to_counts_categories(labels, bedgraphs)\n+ # Write the counts to an output file\n+ write_counts_in_files(cpt, cpt_genome)\n+\n+ ## Plot generation ## MB: all the section still to review\n+ if generate_plot:\n+ print "# Generating plots"\n+ # If input files are the categories counts, the first step is to load them\n+ if options.counts:\n+ #cpt, cpt_genome, sample_names = read_counts(options.counts)\n+ cpt, cpt_genome = read_counts(labels, count_files)\n+ # Managing the unknown biotypes\n+ for sample_label, counters in cpt.items():\n+ for (cat, biot) in counters:\n+ if biot not in biotypes:\n+ unknown_biot.add(biot)\n+ for biot in unknown_biot:\n+ biotypes.add(biot)\n+ biotypes_group1["others"].append(biot)\n+ biotypes = sorted(biotypes)\n+ # Moving antisense cat to the end of the list\n+ biotypes.remove("antisense")\n+ biotypes.append("antisense")\n+ biotypes_group1 = sorted(biotypes_group1)\n+ # Filtering biotypes if necessary\n+ filtered_biotype = None\n+ if options.biotype_filter:\n+ for sample_label in cpt:\n+ for feature in cpt[sample_label]:\n+ biotype = feature[1]\n+ if options.biotype_filter.lower() == biotype.lower():\n+ selected_biotype = biotype\n+ break\n+ if filtered_biotype:\n+ print "\\nWarning: biotype \'" + options.biotype_filter + "\' not found. Please check the biotype name and that this biotype exists in your sample(s)."\n+ # Setting the plots filenames\n+ if options.pdf: ##\xc2\xa0MB: Do the same for svg and png??\n+ pdf = PdfPages(options.pdf)\n+ else:\n+ pdf = False\n+ ## Generate the categories plot\n+ # Recategorizing within the final categories and plot generation\n+ final_cats = categs_levels[options.categories_depth - 1]\n+ parent_categs = parent_categ_groups[options.categories_depth - 1]\n+ final_cat_cpt, final_genome_cpt, filtered_cat_cpt = group_counts_by_categ(cpt, cpt_genome, final_cats, filtered_biotype)\n+ # Remove the "antisense" category if the library type is "unstranded" ##\xc2\xa0MB: if options.strandness == "unstranded": cat_list.remove("antisense")??\n+ for dic in cpt.values():\n+ if ("antisense", "antisense") in dic.keys(): break\n+ else:\n+ cat_list.remove("antisense")\n+ #make_plot(labels, cat_list, sample_labels, final_cat_cpt, final_genome_cpt, pdf, "categories", options.threshold, svg=options.svg, png=options.png)\n+ make_plot(labels, cat_list, final_cat_cpt, final_genome_cpt, pdf, "Categories", options.threshold, svg=options.svg, png=options.png, categ_groups= parent_categs)\n+ if filtered_biotype:\n+ #make_plot(labels, cat_list, sample_labels, filtered_cat_cpt, final_genome_cpt, pdf, "categories", options.threshold, title="Categories distribution for \'" + filtered_biotype + "\' biotype", svg=options.svg, png=options.png)\n+ make_plot(labels, cat_list, filtered_cat_cpt, final_genome_cpt, pdf, "Categories", options.threshold, title="Categories distribution for \'" + filtered_biotype + "\' biotype", svg=options.svg, png=options.png, categ_groups= parent_categs)\n+ ## Generate the biotypes plot\n+ # Recategorization within the final biotypes and plot generation\n+ final_cat_cpt, final_genome_cpt = group_counts_by_biotype(cpt, cpt_genome, biotypes)\n+ #make_plot(biotypes, sample_labels, final_cat_cpt, final_genome_cpt, pdf, "biotypes", options.threshold, svg=options.svg, png=options.png)\n+ make_plot(labels, biotypes, final_cat_cpt, final_genome_cpt, pdf, "Biotypes", options.threshold, svg=options.svg, png=options.png)\n+\n+\n+ print "### End of program ###"\n\\ No newline at end of file\n'

diff -r 000000000000 -r e360f840a92e alfa/ALFA.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/ALFA.xml Wed May 16 09:49:18 2018 -0400

[

b'@@ -0,0 +1,331 @@\n+<tool id="alfa" name="ALFA" version="0.1.0">\n+\t<description>- Plot the distribution of the genomic features captured by aligned reads </description>\n+\n+\t\n+\t<requirements>\n+\t\t<requirement type="package" version="2.24">bedtools</requirement>\n+\t\t<requirement type="package" version="1.2">samtools</requirement>\n+\t\t<requirement type="package" version="1.4">matplotlib</requirement>\n+\t</requirements>\n+\n+\t<command interpreter="python">\n+\t<![CDATA[\n+\t\tALFA_wrapper.py\n+\n+\t\t--project_name "${projectName}"\t\n+\n+\t\t##__INPUT 1: ANNOTATION OF THE SEQ/GENOME__##\n+\t\t#if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "index"\n+\t\t\t--index "$annotation.annotationSource[\'strandedIndex\']" "$annotation.annotationSource[\'unstrandedIndex\']"\n+\t\t#else if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "built_in_index"\n+\t\t\t--bi_index "$annotation.annotationSource.built_in_index_prefix.fields.prefix"\n+\t\t#else\n+\t\t\t--annotation "$annotation.annotationSource[\'annotationFile\']"\n+\t\t#end if\n+\n+\t\t##__INPUT 2: ALIGNED READS__##\n+\t\t--reads_format $reads.readsType[\'readsTypeSelection\']\n+\t\t\t--reads\n+\t\t#for $i, $r in enumerate ( $reads.readsType[\'readsList\'] ) \n+\t\t\t"__fname__$r.readsFile"\n+\t\t\t"__label__$r.readsLabel"\n+\t\t#end for\n+\t\t--strandness $reads[\'strandness\']\n+\n+\t\t##__OUTPUT FILES__##\n+\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n+\t\t\t#if str ( $outputOptions[\'plotFormat\'] ) == "pdf"\n+\t\t\t\t--output_pdf "$outputPdf"\n+\t\t\t#else if str ( $outputOptions[\'plotFormat\'] ) == "png"\n+\t\t\t\t--output_png "$outputCategoriesPng" "$outputBiotypesPng"\n+\t\t\t#else\n+\t\t\t\t--output_svg "$outputCategoriesSvg" "$outputBiotypesSvg"\n+\t\t\t#end if\n+\t\t#end if\n+\t\t#if str ( $outputFiles[\'countFile\'] ) == "True"\n+\t\t\t--output_count "$outputCountFile"\n+\t\t#end if\n+\t\t#if str ( $outputFiles[\'index\'] ) == "True"\n+\t\t\t--output_index "$outputStrandedIndex" "$outputUnstrandedIndex"\n+\t\t#end if\n+\n+\t\t##__OUTPUT OPTIONS__##\n+\t\t--categories_depth $outputOptions[\'categoriesDepth\']\n+\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n+\t\t\t--plot_format $outputOptions[\'plotFormat\']\n+\t\t\t#if str ( $outputOptions.plotThreshold[\'plotThresholdChoice\'] ) == "True"\n+\t\t\t\t--threshold $outputOptions.plotThreshold.yMin $outputOptions.plotThreshold.yMax\n+\t\t\t#end if\n+\t\t#end if\n+\n+\t\t--log_report "$logReport"\n+\t\t--tool_dir "$__tool_directory__"\n+\t]]>\n+\t</command>\n+\t<inputs>\n+\t\t<param name="projectName" value="ALFA" type="text" size="20" label="Project Name">\n+\t\t\t<validator type="empty_field" message="Please, specify a name for your project."/>\n+\t\t</param>\n+\n+\t\t<section name="annotation" title="INPUT 1: Annotation of your genome / sequence" expanded="True">\n+\t\t\t<conditional name="annotationSource">\n+\t\t\t\t<param name="annotationSourceSelection" type="select" label="Select the type of your annotation">\n+\t\t\t\t\t<option value="personal_gtf" selected="true">Personal annotation file (GTF format)</option>\n+\t\t\t\t\t<option value="index">Stranded and Unstranded Indexes previously generated by ALFA (Index format)</option>\n+\t\t\t\t\t<option value="built_in_index">Built-in indexes among a list of referenced genome (Index format)</option>\n+\t\t\t\t</param>\n+\t\t\t\t<when value="personal_gtf">\n+\t\t\t\t\t<param name="annotationFile" type="data" format="Gff, Gtf" label="Select your personal annotation file (GTF format)">\n+\t\t\t\t\t</param>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="index">\n+\t\t\t\t\t<param name="strandedIndex" type="data" label="Select your ALFA Stranded index file (index format)"/>\n+\t\t\t\t\t<param name="unstrandedIndex" type="data" label="Select your ALFA Unstranded index file (index format)"/>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="built_in_index">\n+\t\t\t\t\t<param name="built_in_index_prefix" type="select" label="Select Genome">\n+\t\t\t\t\t\t<options from_data_table="alfa_indexes">\n+\t\t\t\t\t\t\t<validator type="no_options" message="No indexes are available for the selected input dataset. Ask your Galaxy Admin for to use ALFA_data_manager tool to build such indexes!" />\n+\t\t\t\t\t\t</options>\n+\t'..b'assert_stdout>\n+\t\t</test>\n+\t</tests>\n+\n+\t<help>\n+<![CDATA[\n+**What it does**\n+\n+\n+\t| ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s). \n+\t|\n+ \t| Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5\'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.\n+\n+----\n+\n+**ALFA acronym**\n+\n+- Annotation Landscape For Aligned reads\n+\n+----\n+\n+**Official documentation of the tool**\n+\n+\n+- https://github.com/biocompibens/ALFA\n+\n+----\n+\n+**Detailed example**\n+\n+- https://github.com/biocompibens/ALFA#detailed-example\n+\n+----\n+\n+**Nota Bene**\n+\n+* **Input 1: Annotation File**\n+\n+\n+\t| ALFA requires as first input an annotation file (sequence, genome...) in gtf format in order to generate alfa indexes needed in a second round of the program.\n+\t| Indexes are files which list all the coordinates of the categories (stop codon, 5\'-UTR, CDS, intergenic...) and biotypes (protein coding genes, miRNA, tRNA, ...) encountered in the annotated sequence.\n+\t|\n+\t\n+\t.. class:: warningmark\n+\n+\t| Gtf File must be sorted.\n+\t|\n+\n+\t.. class:: infomark\n+\n+\t| Generation of indexes from an annotation file might be time consuming (i.e ~10min for the human genome). Thus, ALFA allows the user to submit directly indexes generated in previous runs as inputs for a new run.\n+\t|\n+\n+\t.. class:: infomark\n+\n+\t| ALFA also enables the use of built-in indexes to save even more computational time. In order to generate easily these built-in indexes, install the data manager tool `ALFA_data_manager`_ available on the toolshed.\n+\n+\t.. _data_manager_build_alfa_indexes: https://toolshed.g2.bx.psu.edu/view/charles-bernard/data_manager_build_alfa_indexes\n+\n+* **Input 2: Reads**\n+\n+\t| ALFA requires as second input a single or a set of mapped reads file(s) in either bam or bedgraph format. The coordinates of the mapped reads will be intersected with the according categories and biotypes mentioned in the indexes.\n+\t| The strandness option determines which strand of the annotated sequence will be taken into account during this intersection.\n+\t|\n+\n+\t.. class:: warningmark\n+\n+\t| Bam or Bedgraph file(s) must be sorted.\n+\t|\n+\n+\t.. class:: warningmark\n+\n+\t| Chromosome names in reads and in annotation file (gtf or indexes) must be the same for the intersection to occur\n+\t|\n+\n+* **Output files**\n+\n+\t| The result of the intersection is a count file displaying the count of nucleotides in the reads for each genomic categories and biotypes. From this count file, plots of the raw and normalized distributions of the reads among these categories are generated.\n+\t| In the output files section, the user can choose what kind of files he/she desires as ALFA output. Categories Count File and Plots are proposed by default. \n+\t|\n+\n+\t.. class:: infomark\n+\n+\t| The user can also select the \'indexes\' option as output. This option is interesting if you plan to run ALFA again with the same submitted annotation file. *See Nota Bene/Input 1: Annotation File for more information.*\n+\t|\n+\n+\t- `How the plots look like`_\n+\n+\t.. _How the plots look like: https://github.com/biocompibens/ALFA#plots\n+\n+\t|\n+\n+\t- `How they are generated`_ \n+\n+\t.. _How they are generated: https://github.com/biocompibens/ALFA#detailed-example\n+\n+----\n+\n+**ALFA Developpers**\n+\n+\t| Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin: *compbio team, Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris*\n+\n+]]>\n+ </help>\n+\n+ <citations>\n+ \t<citation type="bibtex">@MISC{\n+ \t\tauthor="Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin"\n+ \t\ttitle="ALFA: Annotation Landscape For Aligned reads"\n+ \t\tcrossref="https://github.com/biocompibens/ALFA"\n+ \t\tinstitution="Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris"\n+ \t\t}\n+ \t</citation>\n+ </citations>\n+</tool>\n'

diff -r 000000000000 -r e360f840a92e alfa/ALFA_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/ALFA_wrapper.py Wed May 16 09:49:18 2018 -0400

[

b'@@ -0,0 +1,193 @@\n+#!/usr/bin/python\n+\n+import argparse\n+import logging\n+import os\n+import re\n+import shutil\n+import subprocess\n+import sys\n+import tempfile\n+\n+def exit_and_explain(msg):\n+ logging.critical(msg)\n+ sys.exit(msg)\n+\n+def cleanup_before_exit(tmp_dir):\n+ if tmp_dir and os.path.exists(tmp_dir):\n+ shutil.rmtree(tmp_dir)\n+\n+def get_arg():\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument(\'--project_name\', dest=\'project_name\', action=\'store\', nargs=1, metavar=\'project_name\', type=str)\n+ #Input 1: Annotation File\n+ parser.add_argument(\'--index\', dest=\'indexes\', action=\'store\', nargs=2, metavar=(\'stranded_index_filename\', \'unstranded_index_filename\'), type=str)\n+ parser.add_argument(\'--bi_index\', dest=\'bi_indexes\', action=\'store\', nargs=1, metavar=\'built_in_indexes_dir_path\', type=str )\n+ parser.add_argument(\'--annotation\', dest=\'annotation_file\', action=\'store\', nargs=1, metavar=\'annotation_gtf_file\', type=str )\n+ #Input 2: Mapped Reads\n+ parser.add_argument(\'--reads_format\', dest=\'reads_format\', action=\'store\', nargs=1, choices=[\'bam\', \'bedgraph\'], metavar=\'reads_format\', type=str)\n+ parser.add_argument(\'--reads\', dest=\'reads\', action=\'store\', nargs=\'+\', metavar=(\'bam_file1 label1\',""), type=str)\n+ parser.add_argument(\'--strandness\', dest=\'strandness\', action=\'store\', nargs=1, default=[\'unstranded\'], choices=[\'unstranded\', \'forward\', \'reverse\'], metavar=\'strandness\', type=str)\n+ #Output files\n+ parser.add_argument(\'--output_pdf\', dest=\'output_pdf\', action=\'store\', nargs=1, metavar=\'output_pdf_filename\', type=str)\n+ parser.add_argument(\'--output_svg\', dest=\'output_svg\', action=\'store\', nargs=2, metavar=(\'categories_svg_filename\', \'biotypes_svg_filename\'), type=str)\n+ parser.add_argument(\'--output_png\', dest=\'output_png\', action=\'store\', nargs=2, metavar=(\'categories_png_filename\', \'biotypes_png_filename\'), type=str)\n+ parser.add_argument(\'--output_count\', dest=\'output_count\', action=\'store\', nargs=1, metavar=\'output_count_filename\', type=str)\n+ parser.add_argument(\'--output_index\', dest=\'output_indexes\', action=\'store\', nargs=2, metavar=(\'output_stranded_index_filename\', \'output_unstranded_index_filename\'), type=str)\n+ #Output Options\n+ parser.add_argument(\'--categories_depth\', dest=\'categories_depth\', action=\'store\', nargs=1, default=[3], choices=range(1,5), metavar=\'categories_depth\', type=int)\n+ parser.add_argument(\'--plot_format\', dest=\'plot_format\', action=\'store\', nargs=1, choices=[\'pdf\', \'png\', \'svg\'], metavar=\'plot_format\', type=str)\n+ parser.add_argument(\'--threshold\', dest=\'threshold\', action=\'store\', nargs=2, metavar=(\'yMin\', \'yMax\'), type=float)\n+ #Internal variables\n+ parser.add_argument(\'--log_report\', dest=\'log_report\', action=\'store\', nargs=1, metavar=\'log_filename\', type=str)\n+ parser.add_argument(\'--tool_dir\', dest=\'GALAXY_TOOL_DIR\', action=\'store\', nargs=1, metavar=\'galaxy_tool_dir_path\', type=str)\n+ args = parser.parse_args()\n+ return args\n+\n+def symlink_user_indexes(stranded_index, unstranded_index, tmp_dir):\n+ index=\'index\'\n+ os.symlink(stranded_index, os.path.join(tmp_dir, index + \'.stranded.index\'))\n+ os.symlink(unstranded_index, os.path.join(tmp_dir, index + \'.unstranded.index\'))\n+ return index\n+\n+def get_input2_args(reads_list, format, tmp_dir):\n+ n = len(reads_list)\n+ if n%2 != 0:\n+ exit_and_explain(\'Problem with pairing reads filename and reads label\')\n+ if format == \'bam\':\n+ input2_args = \'--bam\'\n+ elif format == \'bedgraph\':\n+ input2_args = \'--bedgraph\'\n+ k = 0\n+ reads_filenames = [\'\'] * (n/2)\n+ reads_labels = [\'\'] * (n/2)\n+ for i in range(0, n, 2):\n+ curr_filename = reads_list[i].split(\'__fname__\')[1]\n+ # Alfa checks extension so the filename must end either by .bedgraph or by .bam\n+ # We then create a symlink from file.dat to tmp_dir/annotation_n.<format> to avoid the error message\n+ reads_filenames[k] = os.'..b'ing.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format=\'%(message)s\')\n+ alfa_path = os.path.join(args.GALAXY_TOOL_DIR[0], \'ALFA.py\')\n+\n+ #INPUT1: Annotation File\n+ if args.indexes:\n+ # The indexes submitted by the user must exhibit the suffix \'.(un)stranded.index\' and will be called by alfa by their prefix\n+ index = symlink_user_indexes(args.indexes[0], args.indexes[1], tmp_dir)\n+ input1_args = \'-g "%s"\' % index\n+ elif args.bi_indexes:\n+ input1_args = \'-g "%s"\' % args.bi_indexes[0]\n+ elif args.annotation_file:\n+ input1_args = \'-a "%s"\' % args.annotation_file[0]\n+ else:\n+ exit_and_explain(\'No annotation file submitted !\')\n+\n+ #INPUT 2: Mapped Reads\n+ if args.reads:\n+ input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0], tmp_dir)\n+ strandness = \'-s %s\' % args.strandness[0]\n+ else:\n+ exit_and_explain(\'No reads submitted !\')\n+\n+ ##Output options\n+ categories_depth = \'-d %s\' % args.categories_depth[0]\n+ if not (args.output_pdf or args.output_png or args.output_svg):\n+ output_args = \'--n\'\n+ else:\n+ plot_suffix = os.path.join(tmp_dir, "ALFA_plot");\n+ if args.output_pdf:\n+ output_args = \'--pdf \' + plot_suffix + \'.pdf\'\n+ if args.output_png:\n+ output_args = \'--png \' + plot_suffix\n+ if args.output_svg:\n+ output_args = \'--svg \' + plot_suffix\n+ if args.threshold:\n+ output_args = \'%s -t %.3f %.3f\' % (output_args, args.threshold[0], args.threshold[1])\n+\n+ ##Run alfa\n+ cmd = \'python %s %s %s %s %s %s\' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args)\n+ # Change into the tmp dir because ALFA produces files in the current dir\n+ curr_dir = os.getcwd()\n+ os.chdir(tmp_dir)\n+ print(cmd)\n+ logging.info("__________________________________________________________________\\n")\n+ logging.info("Alfa execution")\n+ logging.info("__________________________________________________________________\\n")\n+ logging.info("Command Line:\\n%s\\n" % cmd)\n+ logging.info("------------------------------------------------------------------\\n")\n+ alfa_result = subprocess.Popen(args=cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+ alfa_out, alfa_err = alfa_result.communicate()\n+\n+ ##Handle stdout, warning, errors...\n+ redirect_errors(alfa_out, alfa_err)\n+\n+ logging.info("Alfa prompt:\\n%s" % alfa_out)\n+\n+ ##Redirect outputs\n+ if args.output_pdf:\n+ shutil.move(plot_suffix + \'.pdf\', args.output_pdf[0])\n+ if args.output_png:\n+ shutil.move(plot_suffix + \'.Categories.png\', args.output_png[0])\n+ shutil.move(plot_suffix + \'.Biotypes.png\', args.output_png[1])\n+ if args.output_svg:\n+ shutil.move(plot_suffix + \'.Categories.svg\', args.output_svg[0])\n+ shutil.move(plot_suffix + \'.Biotypes.svg\', args.output_svg[1])\n+ if args.output_count:\n+ count_filename = merge_count_files(reads_labels)\n+ shutil.move(count_filename, args.output_count[0])\n+ if args.output_indexes:\n+ if args.annotation_file:\n+ indexes_regex = re.compile(\'.*\\.index\')\n+ indexes = filter(indexes_regex.search, os.listdir(\'.\'))\n+ indexes.sort()\n+ shutil.move(indexes[0], args.output_indexes[0])\n+ shutil.move(indexes[1], args.output_indexes[1])\n+ if args.indexes:\n+ shutil.move(index + \'.stranded.index\', args.output_indexes[0])\n+ shutil.move(index + \'.unstranded.index\', args.output_indexes[1])\n+ if args.bi_indexes:\n+ shutil.move(args.bi_indexes[0] + \'.stranded.index\', args.output_index[0])\n+ shutil.move(args.bi_indexes[1] + \'.unstranded.index\', args.output_index[1])\n+\n+ # Get back to the original dir and cleanup the tmp dir\n+ os.chdir(curr_dir)\n+ cleanup_before_exit(tmp_dir)\n+main()\n'

diff -r 000000000000 -r e360f840a92e alfa/test-data/alfa_toy-Biofeatures Distribution.pdf

Binary file alfa/test-data/alfa_toy-Biofeatures Distribution.pdf has changed

diff -r 000000000000 -r e360f840a92e alfa/test-data/alfa_toy.bam

Binary file alfa/test-data/alfa_toy.bam has changed

diff -r 000000000000 -r e360f840a92e alfa/test-data/alfa_toy.bedgraph
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/test-data/alfa_toy.bedgraph Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,4 @@
+Chr1 149 199 2
+Chr1 299 349 1
+Chr1 499 549 6
+Chr1 1099 1149 1

diff -r 000000000000 -r e360f840a92e alfa/test-data/alfa_toy.categories_counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/test-data/alfa_toy.categories_counts Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,5 @@
+#Category,biotype Counts_in_bam Size_in_genome
+CDS,protein_coding 300.0 624.0
+five_prime_utr,protein_coding 75.0 250.5
+three_prime_utr,protein_coding 25.0 126.5
+intergenic,intergenic 100.0 249.0

diff -r 000000000000 -r e360f840a92e alfa/test-data/alfa_toy.gtf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/test-data/alfa_toy.gtf Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,6 @@
+Chr1 ensembl_havana gene 250 1250 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding";
+Chr1 ensembl_havana transcript 250 1250 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding";
+Chr1 ensembl_havana exon 375 1000 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding";
+Chr1 ensembl_havana CDS 375 1000 . + 0 gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding";
+Chr1 ensembl_havana five_prime_utr 250 375 . - . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding";
+Chr1 ensembl_havana three_prime_utr 1000 1250 . - . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding";

diff -r 000000000000 -r e360f840a92e alfa/test-data/alfa_toy.stranded.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/test-data/alfa_toy.stranded.index Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,11 @@
+#Chr1 1250
+Chr1 249 374 + protein_coding:gene,transcript
+Chr1 249 374 - protein_coding:five_prime_utr
+Chr1 374 375 + protein_coding:exon,CDS
+Chr1 374 375 - protein_coding:five_prime_utr,three_prime_utr
+Chr1 375 999 + protein_coding:exon,CDS
+Chr1 375 999 - antisense
+Chr1 999 1000 + protein_coding:exon,CDS
+Chr1 999 1000 - protein_coding:three_prime_utr
+Chr1 1000 1250 + protein_coding:gene,transcript
+Chr1 1000 1250 - protein_coding:five_prime_utr,three_prime_utr,exon,CDS

diff -r 000000000000 -r e360f840a92e alfa/test-data/alfa_toy.unstranded.index
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/test-data/alfa_toy.unstranded.index Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,6 @@
+#Chr1 1250
+Chr1 249 374 . protein_coding:five_prime_utr,gene,transcript
+Chr1 374 375 . protein_coding:five_prime_utr,three_prime_utr,exon,CDS
+Chr1 375 999 . protein_coding:exon,CDS
+Chr1 999 1000 . protein_coding:three_prime_utr,exon,CDS
+Chr1 1000 1250 . protein_coding:five_prime_utr,exon,CDS,three_prime_utr,gene,transcript

diff -r 000000000000 -r e360f840a92e alfa/tool-data/alfa_indexes.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/tool-data/alfa_indexes.loc.sample Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,2 @@
+#<species> <version> <release> <value> <dbkey> <name> <prefix>
+#Dictyostelium_discoideum dicty_2 7 Dictyostelium_discoideum_dicty_2_7 Dictyostelium_discoideum_dicty_2_7 Dictyostelium_discoideum: dicty_2 (release 7) <path_to_dicty_indexes_dir>

diff -r 000000000000 -r e360f840a92e alfa/tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/tool_data_table_conf.xml.sample Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,7 @@
+<tables>
+    
+    <table name="alfa_indexes" comment_char="#" allow_duplicate_entries="False">
+        <columns>species, version, release, value, dbkey, name, prefix</columns>
+        <file path="tool-data/alfa_indexes.loc" />
+    </table>
+</tables>

diff -r 000000000000 -r e360f840a92e alfa/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alfa/tool_dependencies.xml Wed May 16 09:49:18 2018 -0400

@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<tool_dependency>
+ <package name="bedtools" version="2.24">
+ <repository changeset_revision="3416a1d4a582" name="package_bedtools_2_24" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+ </package>
+     <package name="samtools" version="1.2">
+     <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+ </package>
+     <package name="matplotlib" version="1.4">
+     <repository changeset_revision="f7424e1cf115" name="package_python_2_7_matplotlib_1_4" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+     </package>
+</tool_dependency>