Galaxy |

Changeset 36:9e7727cea26d (2017-12-21)

Previous changeset 35:8930b22aba2b (2017-12-21) Next changeset 37:eb3dd0bab392 (2017-12-21)

Commit message:
Deleted selected files

removed:
alfa/alfa/alfa/.shed.yml
alfa/alfa/alfa/ALFA.py
alfa/alfa/alfa/ALFA.xml
alfa/alfa/alfa/ALFA_wrapper.py

diff -r 8930b22aba2b -r 9e7727cea26d alfa/alfa/alfa/.shed.yml
--- a/alfa/alfa/alfa/.shed.yml Thu Dec 21 09:38:45 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,14 +0,0 @@
-categories:
-- Graphics
-- Next Gen Mappers
-- Sequence Analysis
-- Visualization
-description: A tool to Compute and display distribution of reads by genomic categories
-long_description: |
- ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s).
- Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.
- https://github.com/biocompibens/ALFA
-name: alfa
-owner: charles_bernard
-remote_repository_url: https://github.com/biocompibens/ALFA/tree/master/Galaxy_toolshed_repositories/ALFA
-type: unrestricted

diff -r 8930b22aba2b -r 9e7727cea26d alfa/alfa/alfa/ALFA.py
--- a/alfa/alfa/alfa/ALFA.py Thu Dec 21 09:38:45 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,1160 +0,0 @@\n-#!/usr/bin/python\n-#-*- coding: utf-8 -*-\n-\n-__author__ = \'noel & bahin\'\n-\'\'\' <decription> \'\'\'\n-\n-import argparse\n-import os\n-import numpy\n-import sys\n-import subprocess\n-import matplotlib.pyplot as plt\n-import matplotlib.cm as cmx\n-import matplotlib.colors as colors\n-import matplotlib.patheffects as PathEffects\n-import re\n-from matplotlib.backends.backend_pdf import PdfPages\n-# To correctly embbed the texts when saving plots in svg format\n-import matplotlib\n-matplotlib.rcParams[\'svg.fonttype\'] = \'none\'\n-\n-##########################################################################\n-# FUNCTIONS #\n-##########################################################################\n-\n-def init_dict(d, key, init):\n-\tif key not in d:\n-\t\td[key] = init\n-\n-def get_chromosome_lengths(args):\n-\t"""\n-\tParse the file containing the chromosomes lengths.\n-\tIf no length file is provided, browse the annotation file (GTF) to estimate the chromosome sizes (\n-\t"""\n-\tlengths={}\n-\tgtf_chrom_names=set()\n-\tforce_get_lengths = False\n-\t# If the user provided the chromosome length file\n-\tif args.chr_len:\n-\t\twith open(args.chr_len, \'r\') as chr_len_file:\n-\t\t\tfor line in chr_len_file:\n-\t\t\t\tlengths[line.split(\'\\t\')[0]] = int(line.rstrip().split(\'\\t\')[1])\n-\t\twith open(args.annotation,\'r\') as gtf_file:\n-\t\t\tfor line in gtf_file:\n-\t\t\t\tif not line.startswith(\'#\'):\n-\t\t\t\t\tchrom = line.split(\'\\t\')[0]\n-\t\t\t\t\tif chrom not in gtf_chrom_names:\n-\t\t\t\t\t\tgtf_chrom_names.add(chrom)\n-\t\tfor chrom in lengths.keys():\n-\t\t\tif chrom not in gtf_chrom_names:\n-\t\t\t\tprint "Warning: at least one chromosome name (\'"+chrom+"\') of the file \'"+args.chr_len+"\'does not match any chromosome name if GTF and was ignored."\n-\t\t\t\t#del lengths[chrom]\n-\t\t\t\tbreak\n-\t\tfor chrom in gtf_chrom_names:\n-\t\t\tif force_get_lengths: break\n-\t\t\tif chrom not in lengths.keys():\n-\t\t\t\tprint "WARNING: chromosome name \'"+chrom+"\' was found in gtf and does not match any chromosome name provided in",args.chr_len+". "\n-\t\t\t\tprint "\\t=> The chromosome lenghts will be approximated using annotations in the GTF file."\n-\t\t\t\tcontinue_value =""\n-\t\t\t\twhile continue_value not in {"yes","y","no","n"}:\n-\t\t\t\t\tcontinue_value = raw_input("\\tDo you want to continue (\'yes\' or \'y\')?\\n\\tElse write \'no\' or \'n\' to exit the script and check your file of lengths.\\n")\n-\t\t\t\t\tif continue_value == "no" or continue_value == "n":\n-\t\t\t\t\t\tsys.exit("Exiting")\n-\t\t\t\t\telif continue_value == "yes" or continue_value == "y":\n-\t\t\t\t\t\tforce_get_lengths = True\n-\t\t\t\t\t\tbreak\n-\t\t\t\t\tprint "Error: use \'yes/y/no/n\' only"\n-\t\tif not force_get_lengths:\n-\t\t\treturn lengths\n-\t# Otherwise, (or if at least one chromosome was missing in chromosome lengths file) we consider the end of the last annotation of the chromosome in the GTF file as the chromosome length\n-\twith open(args.annotation, \'r\') as gtf_file:\n-\t\tfor line in gtf_file:\n-\t\t\tif not line.startswith(\'#\'):\n-\t\t\t\tchrom = line.split(\'\\t\')[0]\n-\t\t\t\tend = int(line.split(\'\\t\')[4])\n-\t\t\t\tinit_dict(lengths, chrom, 0)\n-\t\t\t\tlengths[chrom] = max(lengths[chrom], end)\n-\t\tif force_get_lengths:\n-\t\t\tprint "The chromosome lenghts have been approximated using the last annotations in the GTF file."\n-\t\treturn lengths\n-\n-def write_feature_on_index(feat,chrom, start, stop, sign, stranded_genome_index, unstranded_genome_index=None):\n-\tgrouped_by_biotype_features = []\n-\tfor biotype,categs in feat.iteritems():\n-\t\tcateg_list=[]\n-\t\tfor cat in set(categs):\n-\t\t\tcateg_list.append(cat)\n-\t\tgrouped_by_biotype_features.append(":".join((str(biotype),",".join(categ_list))))\n-\tstranded_genome_index.write(\'\\t\'.join((chrom, start, stop, sign,\'\'))+\'\\t\'.join(grouped_by_biotype_features)+\'\\n\')\n-\tif unstranded_genome_index :\n-\t\tunstranded_genome_index.write(\'\\t\'.join((chrom, start, stop, \'.\',\'\'))+\'\\t\'.join(grouped_by_biotype_features)+\'\\n\')\n-\n-\n-def add_info(cpt, feat_values, start, stop, chrom=None, unstranded_genome_index=None, stranded_genome_index = None , biotype_prios=None, coverage=1, cate'..b'mples_files,samples_names,prios,genome_index, options.strandness[0], biotype_prios = None)\n-\n-\t#### Write the counts on disk\n-\twrite_counts_in_files(cpt,cpt_genome)\n-\n-if not (intersect_reads or process_counts) or (options.quiet and options.pdf == False):\n-\tquit("\\n### End of program")\n-print "\\n### Generating plots"\n-# Updating the biotypes lists (biotypes and \'biotype_group1\'): adding the \'unknow biotypes\' found in gtf/index\n-if unknown_feature == []: # \'unknown_feature\' is define only during the index generation\n-\t# Browse the feature to determine whether some biotypes are \'unknown\'\n-\tfor sample,counts in cpt.items():\n-\t\tfor (cat,biot) in counts:\n-\t\t\tif biot not in biotypes and cat not in unknown_feature:\n-\t\t\t\tunknown_feature.append(biot)\n-for new_biot in unknown_feature:\n-\tbiotypes.add(new_biot)\n-\tbiotypes_group1["others"].append(new_biot)\n-biotypes = sorted(biotypes)\n-# move antisense categ to the end of the list\n-biotypes.remove(\'antisense\')\n-biotypes.append(\'antisense\')\n-biotypes_group1 = sorted(biotypes_group1)\n-\n-\n-#print \'\\nCounts for every category/biotype pair: \',cpt\n-\n-# Generating plots\n-if options.pdf != False:\n-\tif options.pdf == None:\n-\t\toptions.pdf = "categories_plots.pdf"\n-\tpdf = PdfPages(options.pdf)\n-else:\n-\tpdf = False\n-\n-selected_biotype = None\n-if options.biotype_filter:\n-\toptions.biotype_filter = options.biotype_filter[0]\n-\tfor sample in cpt:\n-\t\tfor feature in cpt[sample]:\n-\t\t\tbiotype = feature[1]\n-\t\t\tif options.biotype_filter.lower() == biotype.lower():\n-\t\t\t\tselected_biotype=biotype\n-\t\t\t\tbreak\n-\tif selected_biotype == None :\n-\t\tprint "\\nError: biotype \'"+options.biotype_filter+"\' not found. Please check the biotype name and that this biotype exists in your sample(s)."\n-\t\tsys.exit()\n-\n-#Print a warning message if the UTRs are not specified as 5\' or 3\' (they will be ploted as 5\'UTR)\n-if \'UTR\' in [categ[0] for counts in cpt.values() for categ in counts.keys()]:\n-\tprint \'\'\'\\nWARNING: (some) 5\'UTR/3\'UTR are not precisely defined. Consequently, positions annotated as "UTR" will be counted as "5\'UTR"\\n\'\'\'\n-\n-#### Make the plot by categories\n-\t#### Recategorizing with the final categories\n-final_cats=categs_groups[options.categories_depth-1]\n-final_cat_cpt,final_genome_cpt, filtered_cat_cpt = group_counts_by_categ(cpt,cpt_genome,final_cats,selected_biotype)\n-\t#### Display the distribution of specified categories (or biotypes) in samples on a barplot\n-# Remove the \'antisense\' category if the library type is \'unstranded\'\n-for dic in cpt.values():\n-\tif (\'antisense\',\'antisense\') in dic.keys(): break\n-else:\n-\tcat_list.remove(\'antisense\')\n-make_plot(cat_list,samples_names,final_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold, svg = options.svg, png = options.png)\n-if selected_biotype :\n-\tmake_plot(cat_list,samples_names,filtered_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold,title="Categories distribution for \'"+selected_biotype+"\' biotype", svg = options.svg, png = options.png)\n-\n-#### Make the plot by biotypes\n-\t#### Recategorizing with the final categories\n-final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes)\n-\t#### Display the distribution of specified categories (or biotypes) in samples on a barplot\n-make_plot(biotypes,samples_names,final_cat_cpt,final_genome_cpt,pdf, "biotypes",options.threshold, svg = options.svg, png = options.png)\n-\n-\n-\n-\t##### Recategorizing with the final categories\n-#final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes_group1)\n-\t##### Display the distribution of specified categories (or biotypes) in samples on a barplot\n-#make_plot(biotypes_group1,samples_names,final_cat_cpt,final_genome_cpt,pdf,"Biotype groups", options.threshold, title="Biotypes distribution in mapped reads \\n(biotypes are grouped by \'family\')", svg = options.svg, png = options.png)\n-\n-\n-if options.pdf:\n-\tpdf.close()\n-\tprint "\\n### Plots saved in pdf file: %s" %options.pdf\n-\t\n-print "\\n### End of program"\n\\ No newline at end of file\n'

diff -r 8930b22aba2b -r 9e7727cea26d alfa/alfa/alfa/ALFA.xml
--- a/alfa/alfa/alfa/ALFA.xml Thu Dec 21 09:38:45 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,329 +0,0 @@\n-<tool id="alfa" name="ALFA" version="0.1.0">\n-\t<description>- Plot the distribution of genomic features in your aligned reads </description>\n-\n-\t\n-\t<requirements>\n- \t<requirement type="package">bedtools</requirement>\n- \t<requirement type="package">samtools</requirement>\n- \t<requirement type="package">matplotlib</requirement>\n- \t</requirements>\n-\n-\t<command interpreter="python">\n-\t<![CDATA[\n-\t\tALFA_wrapper.py\n-\n-\t\t--project_name "${projectName}"\t\n-\n-\t\t##__INPUT 1: ANNOTATION OF THE SEQ/GENOME__##\n-\t\t#if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "index"\n-\t\t\t--index "$annotation.annotationSource[\'strandedIndex\']" "$annotation.annotationSource[\'unstrandedIndex\']"\n-\t\t#else if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "built_in_index"\n-\t\t\t--bi_index "$annotation.annotationSource.built_in_index_prefix.fields.prefix"\n-\t\t#else\n-\t\t\t--annotation "$annotation.annotationSource[\'annotationFile\']"\n-\t\t#end if\n-\n-\t\t##__INPUT 2: ALIGNED READS__##\n-\t\t--reads_format $reads.readsType[\'readsTypeSelection\']\n-\t\t\t--reads\n-\t\t#for $i, $r in enumerate ( $reads.readsType[\'readsList\'] ) \n-\t\t\t"__fname__$r.readsFile"\n-\t\t\t"__label__$r.readsLabel"\n-\t\t#end for\n-\t\t--strandness $reads[\'strandness\']\n-\n-\t\t##__OUTPUT FILES__##\n-\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n-\t\t\t#if str ( $outputOptions[\'plotFormat\'] ) == "pdf"\n-\t\t\t\t--output_pdf "$outputPdf"\n-\t\t\t#else if str ( $outputOptions[\'plotFormat\'] ) == "png"\n-\t\t\t\t--output_png "$outputCategoriesPng" "$outputBiotypesPng"\n-\t\t\t#else\n-\t\t\t\t--output_svg "$outputCategoriesSvg" "$outputBiotypesSvg"\n-\t\t\t#end if\n-\t\t#end if\n-\t\t#if str ( $outputFiles[\'countFile\'] ) == "True"\n-\t\t\t--output_count "$outputCountFile"\n-\t\t#end if\n-\t\t#if str ( $outputFiles[\'index\'] ) == "True"\n-\t\t\t--output_index "$outputStrandedIndex" "$outputUnstrandedIndex"\n-\t\t#end if\n-\n-\t\t##__OUTPUT OPTIONS__##\n-\t\t--categories_depth $outputOptions[\'categoriesDepth\']\n-\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n-\t\t\t--plot_format $outputOptions[\'plotFormat\']\n-\t\t\t#if str ( $outputOptions.plotThreshold[\'plotThresholdChoice\'] ) == "True"\n-\t\t\t\t--threshold $outputOptions.plotThreshold.yMin $outputOptions.plotThreshold.yMax\n-\t\t\t#end if\n-\t\t#end if\n-\n-\t\t--log_report "$logReport"\n-\t\t--tool_dir "$__tool_directory__"\n-\t]]>\n-\t</command>\n-\t<inputs>\n-\t\t<param name="projectName" value="ALFA" type="text" size="20" label="Project Name">\n-\t\t\t<validator type="empty_field" message="Please, specify a name for your project."/>\n-\t\t</param>\n-\n-\t\t<section name="annotation" title="INPUT 1: Annotation of your genome / sequence" expanded="True">\n-\t\t\t<conditional name="annotationSource">\n-\t\t\t\t<param name="annotationSourceSelection" type="select" label="Select the type of your annotation">\n-\t\t\t\t\t<option value="personal_gtf" selected="true">Personal annotation file (GTF format)</option>\n-\t\t\t\t\t<option value="index">Stranded and Unstranded Indexes previously generated by ALFA (Index format)</option>\n-\t\t\t\t\t<option value="built_in_index">Built-in indexes among a list of referenced genome (Index format)</option>\n-\t\t\t\t</param>\n-\t\t\t\t<when value="personal_gtf">\n-\t\t\t\t\t<param name="annotationFile" type="data" format="Gff, Gtf" label="Select your personal annotation file (GTF format)">\n-\t\t\t\t\t</param>\n-\t\t\t\t</when>\n-\t\t\t\t<when value="index">\n-\t\t\t\t\t<param name="strandedIndex" type="data" format="index" label="Select your ALFA Stranded index file (index format)"/>\n-\t\t\t\t\t<param name="unstrandedIndex" type="data" format="index" label="Select your ALFA Unstranded index file (index format)"/>\n-\t\t\t\t</when>\n-\t\t\t\t<when value="built_in_index">\n-\t\t\t\t\t<param name="built_in_index_prefix" type="select" label="Select Genome">\n-\t\t\t\t\t\t<options from_data_table="alfa_indexes">\n-\t\t\t\t\t\t\t<validator type="no_options" message="No indexes are available for the selected input dataset. Ask your Galaxy Admin for to use ALFA_data_manager tool to build such indexes!" />\n-\t\t\t\t\t\t</options>\n-\t\t\t\t\t</para'..b'assert_stdout>\n-\t\t</test>\n-\t</tests>\n-\n-\t<help>\n-<![CDATA[\n-**What it does**\n-\n-\n-\t| ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s). \n-\t|\n- \t| Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5\'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.\n-\n-----\n-\n-**ALFA acronym**\n-\n-- Annotation Landscape For Aligned reads\n-\n-----\n-\n-**Official documentation of the tool**\n-\n-\n-- https://github.com/biocompibens/ALFA\n-\n-----\n-\n-**Detailed example**\n-\n-- https://github.com/biocompibens/ALFA#detailed-example\n-\n-----\n-\n-**Nota Bene**\n-\n-* **Input 1: Annotation File**\n-\n-\n-\t| ALFA requires as first input an annotation file (sequence, genome...) in gtf format in order to generate alfa indexes needed in a second round of the program.\n-\t| Indexes are files which list all the coordinates of the categories (stop codon, 5\'-UTR, CDS, intergenic...) and biotypes (protein coding genes, miRNA, tRNA, ...) encountered in the annotated sequence.\n-\t|\n-\t\n-\t.. class:: warningmark\n-\n-\t| Gtf File must be sorted.\n-\t|\n-\n-\t.. class:: infomark\n-\n-\t| Generation of indexes from an annotation file might be time consuming (i.e ~10min for the human genome). Thus, ALFA allows the user to submit directly indexes generated in previous runs as inputs for a new run.\n-\t|\n-\n-\t.. class:: infomark\n-\n-\t| ALFA also enables the use of built-in indexes to save even more computational time. In order to generate easily these built-in indexes, install the data manager tool `ALFA_data_manager`_ available on the toolshed.\n-\n-\t.. _data_manager_build_alfa_indexes: https://toolshed.g2.bx.psu.edu/view/charles-bernard/data_manager_build_alfa_indexes\n-\n-* **Input 2: Reads**\n-\n-\t| ALFA requires as second input a single or a set of mapped reads file(s) in either bam or bedgraph format. The coordinates of the mapped reads will be intersected with the according categories and biotypes mentioned in the indexes.\n-\t| The strandness option determines which strand of the annotated sequence will be taken into account during this intersection.\n-\t|\n-\n-\t.. class:: warningmark\n-\n-\t| Bam or Bedgraph file(s) must be sorted.\n-\t|\n-\n-\t.. class:: warningmark\n-\n-\t| Chromosome names in reads and in annotation file (gtf or indexes) must be the same for the intersection to occur\n-\t|\n-\n-* **Output files**\n-\n-\t| The result of the intersection is a count file displaying the count of nucleotides in the reads for each genomic categories and biotypes. From this count file, plots of the raw and normalized distributions of the reads among these categories are generated.\n-\t| In the output files section, the user can choose what kind of files he/she desires as ALFA output. Categories Count File and Plots are proposed by default. \n-\t|\n-\n-\t.. class:: infomark\n-\n-\t| The user can also select the \'indexes\' option as output. This option is interesting if you plan to run ALFA again with the same submitted annotation file. *See Nota Bene/Input 1: Annotation File for more information.*\n-\t|\n-\n-\t- `How the plots look like`_\n-\n-\t.. _How the plots look like: https://github.com/biocompibens/ALFA#plots\n-\n-\t|\n-\n-\t- `How they are generated`_ \n-\n-\t.. _How they are generated: https://github.com/biocompibens/ALFA#detailed-example\n-\n-----\n-\n-**ALFA Developpers**\n-\n-\t| Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin: *compbio team, Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris*\n-\n-]]>\n- </help>\n-\n- <citations>\n- \t<citation type="bibtex">@MISC{\n- \t\tauthor="Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin"\n- \t\ttitle="ALFA: Annotation Landscape For Aligned reads"\n- \t\tcrossref="https://github.com/biocompibens/ALFA"\n- \t\tinstitution="Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris"\n- \t\t}\n- \t</citation>\n- </citations>\n-</tool>\n'

diff -r 8930b22aba2b -r 9e7727cea26d alfa/alfa/alfa/ALFA_wrapper.py
--- a/alfa/alfa/alfa/ALFA_wrapper.py Thu Dec 21 09:38:45 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,183 +0,0 @@\n-#!/usr/bin/python\n-\n-import argparse\n-import logging\n-import os\n-import re\n-import shutil\n-import subprocess\n-import sys\n-import tempfile\n-\n-def exit_and_explain(msg):\n- logging.critical(msg)\n- sys.exit(msg)\n-\n-def cleanup_before_exit(tmp_dir):\n- if tmp_dir and os.path.exists(tmp_dir):\n- shutil.rmtree(tmp_dir)\n-\n-def get_arg():\n- parser = argparse.ArgumentParser()\n- parser.add_argument(\'--project_name\', dest=\'project_name\', action=\'store\', nargs=1, metavar=\'project_name\', type=str)\n- #Input 1: Annotation File\n- parser.add_argument(\'--index\', dest=\'indexes\', action=\'store\', nargs=2, metavar=(\'stranded_index_filename\', \'unstranded_index_filename\'), type=str)\n- parser.add_argument(\'--bi_index\', dest=\'bi_indexes\', action=\'store\', nargs=1, metavar=\'built_in_indexes_dir_path\', type=str )\n- parser.add_argument(\'--annotation\', dest=\'annotation_file\', action=\'store\', nargs=1, metavar=\'annotation_gtf_file\', type=str )\n- #Input 2: Mapped Reads\n- parser.add_argument(\'--reads_format\', dest=\'reads_format\', action=\'store\', nargs=1, choices=[\'bam\', \'bedgraph\'], metavar=\'reads_format\', type=str)\n- parser.add_argument(\'--reads\', dest=\'reads\', action=\'store\', nargs=\'+\', metavar=(\'bam_file1 label1\',""), type=str)\n- parser.add_argument(\'--strandness\', dest=\'strandness\', action=\'store\', nargs=1, default=[\'unstranded\'], choices=[\'unstranded\', \'forward\', \'reverse\'], metavar=\'strandness\', type=str)\n- #Output files\n- parser.add_argument(\'--output_pdf\', dest=\'output_pdf\', action=\'store\', nargs=1, metavar=\'output_pdf_filename\', type=str)\n- parser.add_argument(\'--output_svg\', dest=\'output_svg\', action=\'store\', nargs=2, metavar=(\'categories_svg_filename\', \'biotypes_svg_filename\'), type=str)\n- parser.add_argument(\'--output_png\', dest=\'output_png\', action=\'store\', nargs=2, metavar=(\'categories_png_filename\', \'biotypes_png_filename\'), type=str)\n- parser.add_argument(\'--output_count\', dest=\'output_count\', action=\'store\', nargs=1, metavar=\'output_count_filename\', type=str)\n- parser.add_argument(\'--output_index\', dest=\'output_indexes\', action=\'store\', nargs=2, metavar=(\'output_stranded_index_filename\', \'output_unstranded_index_filename\'), type=str)\n- #Output Options\n- parser.add_argument(\'--categories_depth\', dest=\'categories_depth\', action=\'store\', nargs=1, default=[3], choices=range(1,5), metavar=\'categories_depth\', type=int)\n- parser.add_argument(\'--plot_format\', dest=\'plot_format\', action=\'store\', nargs=1, choices=[\'pdf\', \'png\', \'svg\'], metavar=\'plot_format\', type=str)\n- parser.add_argument(\'--threshold\', dest=\'threshold\', action=\'store\', nargs=2, metavar=(\'yMin\', \'yMax\'), type=float)\n- #Internal variables\n- parser.add_argument(\'--log_report\', dest=\'log_report\', action=\'store\', nargs=1, metavar=\'log_filename\', type=str)\n- parser.add_argument(\'--tool_dir\', dest=\'GALAXY_TOOL_DIR\', action=\'store\', nargs=1, metavar=\'galaxy_tool_dir_path\', type=str)\n- args = parser.parse_args()\n- return args\n-\n-def symlink_user_indexes(stranded_index, unstranded_index):\n- index=\'index\'\n- os.symlink(stranded_index, index + \'.stranded.index\')\n- os.symlink(unstranded_index, index + \'.unstranded.index\')\n- return index\n-\n-def get_input2_args(reads_list, format):\n- n = len(reads_list)\n- if n%2 != 0:\n- exit_and_explain(\'Problem with pairing reads filename and reads label\')\n- if format == \'bam\':\n- input2_args = \'--bam\'\n- elif format == \'begraph\':\n- input2_args = \'--bedgraph\'\n- input2_args=\'-i\'\n- k = 0\n- reads_filenames = [\'\'] * (n/2)\n- reads_labels = [\'\'] * (n/2)\n- for i in range(0, n, 2):\n- reads_filenames[k] = reads_list[i].split(\'__fname__\')[1]\n- cur_label = reads_list[i+1].split(\'__label__\')[1]\n- reads_labels[k] = re.sub(r\' \', \'_\', cur_label)\n- if not reads_labels[k]:\n- reads_labels[k] = \'sample_%s\' % str(k)\n- input2_args=\'%s "%s" "%s"\' % (input2_args, reads_filenames[k]'..b'ount_file.close()\n- merged_count_file.close()\n- return \'count_file.txt\'\n-\n-def main():\n- args = get_arg()\n-\n- if not (args.output_pdf or args.output_png or args.output_svg or args.output_indexes or args.output_count):\n- exit_and_explain(\'Error: no output to return\\nProcess Aborted\\n\')\n- tmp_dir = tempfile.mkdtemp(prefix=\'tmp\', suffix=\'\')\n- logging.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format=\'%(message)s\')\n- alfa_path = os.path.join(args.GALAXY_TOOL_DIR[0], \'ALFA.py\')\n-\n- #INPUT1: Annotation File\n- if args.indexes:\n- # The indexes submitted by the user must exhibit the suffix \'.(un)stranded.index\' and will be called by alfa by their prefix\n- index = symlink_user_indexes(args.indexes[0], args.indexes[1])\n- input1_args = \'-g "%s"\' % index\n- elif args.bi_indexes:\n- input1_args = \'-g "%s"\' % args.bi_indexes[0]\n- elif args.annotation_file:\n- input1_args = \'-a "%s"\' % args.annotation_file[0]\n- else:\n- exit_and_explain(\'No annotation file submitted !\')\n-\n- #INPUT 2: Mapped Reads\n- if args.reads:\n- input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0])\n- strandness = \'-s %s\' % args.strandness[0]\n- else:\n- exit_and_explain(\'No reads submitted !\')\n-\n- ##Output options\n- categories_depth = \'-d %s\' % args.categories_depth[0]\n- if not (args.output_pdf or args.output_png or args.output_svg):\n- output_args = \'--n\'\n- else:\n- if args.output_pdf:\n- output_args = \'--pdf plot.pdf\'\n- if args.output_png:\n- output_args = \'--png plot\'\n- if args.output_svg:\n- output_args = \'--svg plot\'\n- if args.threshold:\n- output_args = \'%s -t %.3f %.3f\' % (output_args, args.threshold[0], args.threshold[1])\n-\n- ##Run alfa\n- cmd = \'python %s %s %s %s %s %s\' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args)\n- logging.info("__________________________________________________________________\\n")\n- logging.info("Alfa execution")\n- logging.info("__________________________________________________________________\\n")\n- logging.info("Command Line:\\n%s\\n" % cmd)\n- logging.info("------------------------------------------------------------------\\n")\n- alfa_result = subprocess.Popen(args=cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n- alfa_out, alfa_err = alfa_result.communicate()\n-\n- ##Handle stdout, warning, errors...\n- redirect_errors(alfa_out, alfa_err)\n-\n- logging.info("Alfa prompt:\\n%s" % alfa_out)\n-\n- ##Redirect outputs\n- if args.output_pdf:\n- shutil.move(\'plot.pdf\', args.output_pdf[0])\n- if args.output_png:\n- shutil.move(\'plot\' + \'.categories.png\', args.output_png[0])\n- shutil.move(\'plot\' + \'.biotypes.png\', args.output_png[1])\n- if args.output_svg:\n- shutil.move(\'plot\' + \'.categories.svg\', args.output_svg[0])\n- shutil.move(\'plot\' + \'.biotypes.svg\', args.output_svg[1])\n- if args.output_count:\n- count_filename = merge_count_files(reads_labels)\n- shutil.move(count_filename, args.output_count[0])\n- if args.output_indexes:\n- if args.annotation_file:\n- indexes_regex = re.compile(\'.*\\.index\')\n- indexes = filter(indexes_regex.search, os.listdir(\'.\'))\n- indexes.sort()\n- shutil.move(indexes[0], args.output_indexes[0])\n- shutil.move(indexes[1], args.output_indexes[1])\n- if args.indexes:\n- shutil.move(index + \'.stranded.index\', args.output_indexes[0])\n- shutil.move(index + \'.unstranded.index\', args.output_indexes[1])\n- if args.bi_indexes:\n- shutil.move(args.bi_indexes[0] + \'.stranded.index\', args.output_index[0])\n- shutil.move(args.bi_indexes[1] + \'.unstranded.index\', args.output_index[1])\n-\n- cleanup_before_exit(tmp_dir)\n-main()\n'