Previous changeset 32:b26aec436ab5 (2017-12-21) Next changeset 34:b7ad8942c871 (2017-12-21) |
Commit message:
Uploaded |
added:
alfa/.shed.yml alfa/ALFA.py alfa/ALFA.xml alfa/ALFA_wrapper.py alfa/test-data/alfa_toy-Biofeatures Distribution.pdf alfa/test-data/alfa_toy.bam alfa/test-data/alfa_toy.bedgraph alfa/test-data/alfa_toy.categories_counts alfa/test-data/alfa_toy.gtf alfa/test-data/alfa_toy.stranded.index alfa/test-data/alfa_toy.unstranded.index alfa/tool-data/alfa_indexes.loc.sample alfa/tool_data_table_conf.xml.sample alfa/tool_dependencies.xml |
removed:
ALFA/.shed.yml ALFA/ALFA.py ALFA/ALFA.xml ALFA/ALFA_wrapper.py ALFA/test-data/alfa_toy-Biofeatures Distribution.pdf ALFA/test-data/alfa_toy.bam ALFA/test-data/alfa_toy.bedgraph ALFA/test-data/alfa_toy.categories_counts ALFA/test-data/alfa_toy.gtf ALFA/test-data/alfa_toy.stranded.index ALFA/test-data/alfa_toy.unstranded.index ALFA/tool-data/alfa_indexes.loc.sample ALFA/tool_data_table_conf.xml.sample ALFA/tool_dependencies.xml |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/.shed.yml --- a/ALFA/.shed.yml Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,14 +0,0 @@ -categories: -- Graphics -- Next Gen Mappers -- Sequence Analysis -- Visualization -description: A tool to Compute and display distribution of reads by genomic categories -long_description: | - ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s). - Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism. - https://github.com/biocompibens/ALFA -name: alfa -owner: charles_bernard -remote_repository_url: https://github.com/biocompibens/ALFA/tree/master/Galaxy_toolshed_repositories/ALFA -type: unrestricted |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/ALFA.py --- a/ALFA/ALFA.py Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1160 +0,0 @@\n-#!/usr/bin/python\n-#-*- coding: utf-8 -*-\n-\n-__author__ = \'noel & bahin\'\n-\'\'\' <decription> \'\'\'\n-\n-import argparse\n-import os\n-import numpy\n-import sys\n-import subprocess\n-import matplotlib.pyplot as plt\n-import matplotlib.cm as cmx\n-import matplotlib.colors as colors\n-import matplotlib.patheffects as PathEffects\n-import re\n-from matplotlib.backends.backend_pdf import PdfPages\n-# To correctly embbed the texts when saving plots in svg format\n-import matplotlib\n-matplotlib.rcParams[\'svg.fonttype\'] = \'none\'\n-\n-##########################################################################\n-# FUNCTIONS #\n-##########################################################################\n-\n-def init_dict(d, key, init):\n-\tif key not in d:\n-\t\td[key] = init\n-\n-def get_chromosome_lengths(args):\n-\t"""\n-\tParse the file containing the chromosomes lengths.\n-\tIf no length file is provided, browse the annotation file (GTF) to estimate the chromosome sizes (\n-\t"""\n-\tlengths={}\n-\tgtf_chrom_names=set()\n-\tforce_get_lengths = False\n-\t# If the user provided the chromosome length file\n-\tif args.chr_len:\n-\t\twith open(args.chr_len, \'r\') as chr_len_file:\n-\t\t\tfor line in chr_len_file:\n-\t\t\t\tlengths[line.split(\'\\t\')[0]] = int(line.rstrip().split(\'\\t\')[1])\n-\t\twith open(args.annotation,\'r\') as gtf_file:\n-\t\t\tfor line in gtf_file:\n-\t\t\t\tif not line.startswith(\'#\'):\n-\t\t\t\t\tchrom = line.split(\'\\t\')[0]\n-\t\t\t\t\tif chrom not in gtf_chrom_names:\n-\t\t\t\t\t\tgtf_chrom_names.add(chrom)\n-\t\tfor chrom in lengths.keys():\n-\t\t\tif chrom not in gtf_chrom_names:\n-\t\t\t\tprint "Warning: at least one chromosome name (\'"+chrom+"\') of the file \'"+args.chr_len+"\'does not match any chromosome name if GTF and was ignored."\n-\t\t\t\t#del lengths[chrom]\n-\t\t\t\tbreak\n-\t\tfor chrom in gtf_chrom_names:\n-\t\t\tif force_get_lengths: break\n-\t\t\tif chrom not in lengths.keys():\n-\t\t\t\tprint "WARNING: chromosome name \'"+chrom+"\' was found in gtf and does not match any chromosome name provided in",args.chr_len+". "\n-\t\t\t\tprint "\\t=> The chromosome lenghts will be approximated using annotations in the GTF file."\n-\t\t\t\tcontinue_value =""\n-\t\t\t\twhile continue_value not in {"yes","y","no","n"}:\n-\t\t\t\t\tcontinue_value = raw_input("\\tDo you want to continue (\'yes\' or \'y\')?\\n\\tElse write \'no\' or \'n\' to exit the script and check your file of lengths.\\n")\n-\t\t\t\t\tif continue_value == "no" or continue_value == "n":\n-\t\t\t\t\t\tsys.exit("Exiting")\n-\t\t\t\t\telif continue_value == "yes" or continue_value == "y":\n-\t\t\t\t\t\tforce_get_lengths = True\n-\t\t\t\t\t\tbreak\n-\t\t\t\t\tprint "Error: use \'yes/y/no/n\' only"\n-\t\tif not force_get_lengths:\n-\t\t\treturn lengths\n-\t# Otherwise, (or if at least one chromosome was missing in chromosome lengths file) we consider the end of the last annotation of the chromosome in the GTF file as the chromosome length\n-\twith open(args.annotation, \'r\') as gtf_file:\n-\t\tfor line in gtf_file:\n-\t\t\tif not line.startswith(\'#\'):\n-\t\t\t\tchrom = line.split(\'\\t\')[0]\n-\t\t\t\tend = int(line.split(\'\\t\')[4])\n-\t\t\t\tinit_dict(lengths, chrom, 0)\n-\t\t\t\tlengths[chrom] = max(lengths[chrom], end)\n-\t\tif force_get_lengths:\n-\t\t\tprint "The chromosome lenghts have been approximated using the last annotations in the GTF file."\n-\t\treturn lengths\n-\n-def write_feature_on_index(feat,chrom, start, stop, sign, stranded_genome_index, unstranded_genome_index=None):\n-\tgrouped_by_biotype_features = []\n-\tfor biotype,categs in feat.iteritems():\n-\t\tcateg_list=[]\n-\t\tfor cat in set(categs):\n-\t\t\tcateg_list.append(cat)\n-\t\tgrouped_by_biotype_features.append(":".join((str(biotype),",".join(categ_list))))\n-\tstranded_genome_index.write(\'\\t\'.join((chrom, start, stop, sign,\'\'))+\'\\t\'.join(grouped_by_biotype_features)+\'\\n\')\n-\tif unstranded_genome_index :\n-\t\tunstranded_genome_index.write(\'\\t\'.join((chrom, start, stop, \'.\',\'\'))+\'\\t\'.join(grouped_by_biotype_features)+\'\\n\')\n-\n-\n-def add_info(cpt, feat_values, start, stop, chrom=None, unstranded_genome_index=None, stranded_genome_index = None , biotype_prios=None, coverage=1, cate'..b'mples_files,samples_names,prios,genome_index, options.strandness[0], biotype_prios = None)\n-\n-\t#### Write the counts on disk\n-\twrite_counts_in_files(cpt,cpt_genome)\n-\n-if not (intersect_reads or process_counts) or (options.quiet and options.pdf == False):\n-\tquit("\\n### End of program")\n-print "\\n### Generating plots"\n-# Updating the biotypes lists (biotypes and \'biotype_group1\'): adding the \'unknow biotypes\' found in gtf/index\n-if unknown_feature == []: # \'unknown_feature\' is define only during the index generation\n-\t# Browse the feature to determine whether some biotypes are \'unknown\'\n-\tfor sample,counts in cpt.items():\n-\t\tfor (cat,biot) in counts:\n-\t\t\tif biot not in biotypes and cat not in unknown_feature:\n-\t\t\t\tunknown_feature.append(biot)\n-for new_biot in unknown_feature:\n-\tbiotypes.add(new_biot)\n-\tbiotypes_group1["others"].append(new_biot)\n-biotypes = sorted(biotypes)\n-# move antisense categ to the end of the list\n-biotypes.remove(\'antisense\')\n-biotypes.append(\'antisense\')\n-biotypes_group1 = sorted(biotypes_group1)\n-\n-\n-#print \'\\nCounts for every category/biotype pair: \',cpt\n-\n-# Generating plots\n-if options.pdf != False:\n-\tif options.pdf == None:\n-\t\toptions.pdf = "categories_plots.pdf"\n-\tpdf = PdfPages(options.pdf)\n-else:\n-\tpdf = False\n-\n-selected_biotype = None\n-if options.biotype_filter:\n-\toptions.biotype_filter = options.biotype_filter[0]\n-\tfor sample in cpt:\n-\t\tfor feature in cpt[sample]:\n-\t\t\tbiotype = feature[1]\n-\t\t\tif options.biotype_filter.lower() == biotype.lower():\n-\t\t\t\tselected_biotype=biotype\n-\t\t\t\tbreak\n-\tif selected_biotype == None :\n-\t\tprint "\\nError: biotype \'"+options.biotype_filter+"\' not found. Please check the biotype name and that this biotype exists in your sample(s)."\n-\t\tsys.exit()\n-\n-#Print a warning message if the UTRs are not specified as 5\' or 3\' (they will be ploted as 5\'UTR)\n-if \'UTR\' in [categ[0] for counts in cpt.values() for categ in counts.keys()]:\n-\tprint \'\'\'\\nWARNING: (some) 5\'UTR/3\'UTR are not precisely defined. Consequently, positions annotated as "UTR" will be counted as "5\'UTR"\\n\'\'\'\n-\n-#### Make the plot by categories\n-\t#### Recategorizing with the final categories\n-final_cats=categs_groups[options.categories_depth-1]\n-final_cat_cpt,final_genome_cpt, filtered_cat_cpt = group_counts_by_categ(cpt,cpt_genome,final_cats,selected_biotype)\n-\t#### Display the distribution of specified categories (or biotypes) in samples on a barplot\n-# Remove the \'antisense\' category if the library type is \'unstranded\'\n-for dic in cpt.values():\n-\tif (\'antisense\',\'antisense\') in dic.keys(): break\n-else:\n-\tcat_list.remove(\'antisense\')\n-make_plot(cat_list,samples_names,final_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold, svg = options.svg, png = options.png)\n-if selected_biotype :\n-\tmake_plot(cat_list,samples_names,filtered_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold,title="Categories distribution for \'"+selected_biotype+"\' biotype", svg = options.svg, png = options.png)\n-\n-#### Make the plot by biotypes\n-\t#### Recategorizing with the final categories\n-final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes)\n-\t#### Display the distribution of specified categories (or biotypes) in samples on a barplot\n-make_plot(biotypes,samples_names,final_cat_cpt,final_genome_cpt,pdf, "biotypes",options.threshold, svg = options.svg, png = options.png)\n-\n-\n-\n-\t##### Recategorizing with the final categories\n-#final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes_group1)\n-\t##### Display the distribution of specified categories (or biotypes) in samples on a barplot\n-#make_plot(biotypes_group1,samples_names,final_cat_cpt,final_genome_cpt,pdf,"Biotype groups", options.threshold, title="Biotypes distribution in mapped reads \\n(biotypes are grouped by \'family\')", svg = options.svg, png = options.png)\n-\n-\n-if options.pdf:\n-\tpdf.close()\n-\tprint "\\n### Plots saved in pdf file: %s" %options.pdf\n-\t\n-print "\\n### End of program"\n\\ No newline at end of file\n' |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/ALFA.xml --- a/ALFA/ALFA.xml Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,329 +0,0 @@\n-<tool id="alfa" name="ALFA" version="0.1.0">\n-\t<description>- Plot the distribution of genomic features in your aligned reads </description>\n-\n-\t<!-- ALFA requires bedtools suite v2.20.0 and above -->\n-\t<requirements>\n- \t<requirement type="package">bedtools</requirement>\n- \t<requirement type="package">samtools</requirement>\n- \t<requirement type="package">matplotlib</requirement>\n- \t</requirements>\n-\n-\t<command interpreter="python">\n-\t<![CDATA[\n-\t\tALFA_wrapper.py\n-\n-\t\t--project_name "${projectName}"\t\n-\n-\t\t##__INPUT 1: ANNOTATION OF THE SEQ/GENOME__##\n-\t\t#if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "index"\n-\t\t\t--index "$annotation.annotationSource[\'strandedIndex\']" "$annotation.annotationSource[\'unstrandedIndex\']"\n-\t\t#else if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "built_in_index"\n-\t\t\t--bi_index "$annotation.annotationSource.built_in_index_prefix.fields.prefix"\n-\t\t#else\n-\t\t\t--annotation "$annotation.annotationSource[\'annotationFile\']"\n-\t\t#end if\n-\n-\t\t##__INPUT 2: ALIGNED READS__##\n-\t\t--reads_format $reads.readsType[\'readsTypeSelection\']\n-\t\t\t--reads\n-\t\t#for $i, $r in enumerate ( $reads.readsType[\'readsList\'] ) \n-\t\t\t"__fname__$r.readsFile"\n-\t\t\t"__label__$r.readsLabel"\n-\t\t#end for\n-\t\t--strandness $reads[\'strandness\']\n-\n-\t\t##__OUTPUT FILES__##\n-\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n-\t\t\t#if str ( $outputOptions[\'plotFormat\'] ) == "pdf"\n-\t\t\t\t--output_pdf "$outputPdf"\n-\t\t\t#else if str ( $outputOptions[\'plotFormat\'] ) == "png"\n-\t\t\t\t--output_png "$outputCategoriesPng" "$outputBiotypesPng"\n-\t\t\t#else\n-\t\t\t\t--output_svg "$outputCategoriesSvg" "$outputBiotypesSvg"\n-\t\t\t#end if\n-\t\t#end if\n-\t\t#if str ( $outputFiles[\'countFile\'] ) == "True"\n-\t\t\t--output_count "$outputCountFile"\n-\t\t#end if\n-\t\t#if str ( $outputFiles[\'index\'] ) == "True"\n-\t\t\t--output_index "$outputStrandedIndex" "$outputUnstrandedIndex"\n-\t\t#end if\n-\n-\t\t##__OUTPUT OPTIONS__##\n-\t\t--categories_depth $outputOptions[\'categoriesDepth\']\n-\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n-\t\t\t--plot_format $outputOptions[\'plotFormat\']\n-\t\t\t#if str ( $outputOptions.plotThreshold[\'plotThresholdChoice\'] ) == "True"\n-\t\t\t\t--threshold $outputOptions.plotThreshold.yMin $outputOptions.plotThreshold.yMax\n-\t\t\t#end if\n-\t\t#end if\n-\n-\t\t--log_report "$logReport"\n-\t\t--tool_dir "$__tool_directory__"\n-\t]]>\n-\t</command>\n-\t<inputs>\n-\t\t<param name="projectName" value="ALFA" type="text" size="20" label="Project Name">\n-\t\t\t<validator type="empty_field" message="Please, specify a name for your project."/>\n-\t\t</param>\n-\n-\t\t<section name="annotation" title="INPUT 1: Annotation of your genome / sequence" expanded="True">\n-\t\t\t<conditional name="annotationSource">\n-\t\t\t\t<param name="annotationSourceSelection" type="select" label="Select the type of your annotation">\n-\t\t\t\t\t<option value="personal_gtf" selected="true">Personal annotation file (GTF format)</option>\n-\t\t\t\t\t<option value="index">Stranded and Unstranded Indexes previously generated by ALFA (Index format)</option>\n-\t\t\t\t\t<option value="built_in_index">Built-in indexes among a list of referenced genome (Index format)</option>\n-\t\t\t\t</param>\n-\t\t\t\t<when value="personal_gtf">\n-\t\t\t\t\t<param name="annotationFile" type="data" format="Gff, Gtf" label="Select your personal annotation file (GTF format)">\n-\t\t\t\t\t</param>\n-\t\t\t\t</when>\n-\t\t\t\t<when value="index">\n-\t\t\t\t\t<param name="strandedIndex" type="data" format="index" label="Select your ALFA Stranded index file (index format)"/>\n-\t\t\t\t\t<param name="unstrandedIndex" type="data" format="index" label="Select your ALFA Unstranded index file (index format)"/>\n-\t\t\t\t</when>\n-\t\t\t\t<when value="built_in_index">\n-\t\t\t\t\t<param name="built_in_index_prefix" type="select" label="Select Genome">\n-\t\t\t\t\t\t<options from_data_table="alfa_indexes">\n-\t\t\t\t\t\t\t<validator type="no_options" message="No indexes are available for the selected input dataset. Ask your Galaxy Admin for to use ALFA_data_manager tool to build such indexes!" />\n-\t\t\t\t\t\t</options>\n-\t\t\t\t\t</para'..b'assert_stdout>\n-\t\t</test>\n-\t</tests>\n-\n-\t<help>\n-<![CDATA[\n-**What it does**\n-\n-\n-\t| ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s). \n-\t|\n- \t| Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5\'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.\n-\n-----\n-\n-**ALFA acronym**\n-\n-- Annotation Landscape For Aligned reads\n-\n-----\n-\n-**Official documentation of the tool**\n-\n-\n-- https://github.com/biocompibens/ALFA\n-\n-----\n-\n-**Detailed example**\n-\n-- https://github.com/biocompibens/ALFA#detailed-example\n-\n-----\n-\n-**Nota Bene**\n-\n-* **Input 1: Annotation File**\n-\n-\n-\t| ALFA requires as first input an annotation file (sequence, genome...) in gtf format in order to generate alfa indexes needed in a second round of the program.\n-\t| Indexes are files which list all the coordinates of the categories (stop codon, 5\'-UTR, CDS, intergenic...) and biotypes (protein coding genes, miRNA, tRNA, ...) encountered in the annotated sequence.\n-\t|\n-\t\n-\t.. class:: warningmark\n-\n-\t| Gtf File must be sorted.\n-\t|\n-\n-\t.. class:: infomark\n-\n-\t| Generation of indexes from an annotation file might be time consuming (i.e ~10min for the human genome). Thus, ALFA allows the user to submit directly indexes generated in previous runs as inputs for a new run.\n-\t|\n-\n-\t.. class:: infomark\n-\n-\t| ALFA also enables the use of built-in indexes to save even more computational time. In order to generate easily these built-in indexes, install the data manager tool `ALFA_data_manager`_ available on the toolshed.\n-\n-\t.. _data_manager_build_alfa_indexes: https://toolshed.g2.bx.psu.edu/view/charles-bernard/data_manager_build_alfa_indexes\n-\n-* **Input 2: Reads**\n-\n-\t| ALFA requires as second input a single or a set of mapped reads file(s) in either bam or bedgraph format. The coordinates of the mapped reads will be intersected with the according categories and biotypes mentioned in the indexes.\n-\t| The strandness option determines which strand of the annotated sequence will be taken into account during this intersection.\n-\t|\n-\n-\t.. class:: warningmark\n-\n-\t| Bam or Bedgraph file(s) must be sorted.\n-\t|\n-\n-\t.. class:: warningmark\n-\n-\t| Chromosome names in reads and in annotation file (gtf or indexes) must be the same for the intersection to occur\n-\t|\n-\n-* **Output files**\n-\n-\t| The result of the intersection is a count file displaying the count of nucleotides in the reads for each genomic categories and biotypes. From this count file, plots of the raw and normalized distributions of the reads among these categories are generated.\n-\t| In the output files section, the user can choose what kind of files he/she desires as ALFA output. Categories Count File and Plots are proposed by default. \n-\t|\n-\n-\t.. class:: infomark\n-\n-\t| The user can also select the \'indexes\' option as output. This option is interesting if you plan to run ALFA again with the same submitted annotation file. *See Nota Bene/Input 1: Annotation File for more information.*\n-\t|\n-\n-\t- `How the plots look like`_\n-\n-\t.. _How the plots look like: https://github.com/biocompibens/ALFA#plots\n-\n-\t|\n-\n-\t- `How they are generated`_ \n-\n-\t.. _How they are generated: https://github.com/biocompibens/ALFA#detailed-example\n-\n-----\n-\n-**ALFA Developpers**\n-\n-\t| Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin: *compbio team, Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris*\n-\n-]]>\n- </help>\n-\n- <citations>\n- \t<citation type="bibtex">@MISC{\n- \t\tauthor="Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin"\n- \t\ttitle="ALFA: Annotation Landscape For Aligned reads"\n- \t\tcrossref="https://github.com/biocompibens/ALFA"\n- \t\tinstitution="Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris"\n- \t\t}\n- \t</citation>\n- </citations>\n-</tool>\n' |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/ALFA_wrapper.py --- a/ALFA/ALFA_wrapper.py Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,183 +0,0 @@\n-#!/usr/bin/python\n-\n-import argparse\n-import logging\n-import os\n-import re\n-import shutil\n-import subprocess\n-import sys\n-import tempfile\n-\n-def exit_and_explain(msg):\n- logging.critical(msg)\n- sys.exit(msg)\n-\n-def cleanup_before_exit(tmp_dir):\n- if tmp_dir and os.path.exists(tmp_dir):\n- shutil.rmtree(tmp_dir)\n-\n-def get_arg():\n- parser = argparse.ArgumentParser()\n- parser.add_argument(\'--project_name\', dest=\'project_name\', action=\'store\', nargs=1, metavar=\'project_name\', type=str)\n- #Input 1: Annotation File\n- parser.add_argument(\'--index\', dest=\'indexes\', action=\'store\', nargs=2, metavar=(\'stranded_index_filename\', \'unstranded_index_filename\'), type=str)\n- parser.add_argument(\'--bi_index\', dest=\'bi_indexes\', action=\'store\', nargs=1, metavar=\'built_in_indexes_dir_path\', type=str )\n- parser.add_argument(\'--annotation\', dest=\'annotation_file\', action=\'store\', nargs=1, metavar=\'annotation_gtf_file\', type=str )\n- #Input 2: Mapped Reads\n- parser.add_argument(\'--reads_format\', dest=\'reads_format\', action=\'store\', nargs=1, choices=[\'bam\', \'bedgraph\'], metavar=\'reads_format\', type=str)\n- parser.add_argument(\'--reads\', dest=\'reads\', action=\'store\', nargs=\'+\', metavar=(\'bam_file1 label1\',""), type=str)\n- parser.add_argument(\'--strandness\', dest=\'strandness\', action=\'store\', nargs=1, default=[\'unstranded\'], choices=[\'unstranded\', \'forward\', \'reverse\'], metavar=\'strandness\', type=str)\n- #Output files\n- parser.add_argument(\'--output_pdf\', dest=\'output_pdf\', action=\'store\', nargs=1, metavar=\'output_pdf_filename\', type=str)\n- parser.add_argument(\'--output_svg\', dest=\'output_svg\', action=\'store\', nargs=2, metavar=(\'categories_svg_filename\', \'biotypes_svg_filename\'), type=str)\n- parser.add_argument(\'--output_png\', dest=\'output_png\', action=\'store\', nargs=2, metavar=(\'categories_png_filename\', \'biotypes_png_filename\'), type=str)\n- parser.add_argument(\'--output_count\', dest=\'output_count\', action=\'store\', nargs=1, metavar=\'output_count_filename\', type=str)\n- parser.add_argument(\'--output_index\', dest=\'output_indexes\', action=\'store\', nargs=2, metavar=(\'output_stranded_index_filename\', \'output_unstranded_index_filename\'), type=str)\n- #Output Options\n- parser.add_argument(\'--categories_depth\', dest=\'categories_depth\', action=\'store\', nargs=1, default=[3], choices=range(1,5), metavar=\'categories_depth\', type=int)\n- parser.add_argument(\'--plot_format\', dest=\'plot_format\', action=\'store\', nargs=1, choices=[\'pdf\', \'png\', \'svg\'], metavar=\'plot_format\', type=str)\n- parser.add_argument(\'--threshold\', dest=\'threshold\', action=\'store\', nargs=2, metavar=(\'yMin\', \'yMax\'), type=float)\n- #Internal variables\n- parser.add_argument(\'--log_report\', dest=\'log_report\', action=\'store\', nargs=1, metavar=\'log_filename\', type=str)\n- parser.add_argument(\'--tool_dir\', dest=\'GALAXY_TOOL_DIR\', action=\'store\', nargs=1, metavar=\'galaxy_tool_dir_path\', type=str)\n- args = parser.parse_args()\n- return args\n-\n-def symlink_user_indexes(stranded_index, unstranded_index):\n- index=\'index\'\n- os.symlink(stranded_index, index + \'.stranded.index\')\n- os.symlink(unstranded_index, index + \'.unstranded.index\')\n- return index\n-\n-def get_input2_args(reads_list, format):\n- n = len(reads_list)\n- if n%2 != 0:\n- exit_and_explain(\'Problem with pairing reads filename and reads label\')\n- if format == \'bam\':\n- input2_args = \'--bam\'\n- elif format == \'begraph\':\n- input2_args = \'--bedgraph\'\n- input2_args=\'-i\'\n- k = 0\n- reads_filenames = [\'\'] * (n/2)\n- reads_labels = [\'\'] * (n/2)\n- for i in range(0, n, 2):\n- reads_filenames[k] = reads_list[i].split(\'__fname__\')[1]\n- cur_label = reads_list[i+1].split(\'__label__\')[1]\n- reads_labels[k] = re.sub(r\' \', \'_\', cur_label)\n- if not reads_labels[k]:\n- reads_labels[k] = \'sample_%s\' % str(k)\n- input2_args=\'%s "%s" "%s"\' % (input2_args, reads_filenames[k]'..b'ount_file.close()\n- merged_count_file.close()\n- return \'count_file.txt\'\n-\n-def main():\n- args = get_arg()\n-\n- if not (args.output_pdf or args.output_png or args.output_svg or args.output_indexes or args.output_count):\n- exit_and_explain(\'Error: no output to return\\nProcess Aborted\\n\')\n- tmp_dir = tempfile.mkdtemp(prefix=\'tmp\', suffix=\'\')\n- logging.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format=\'%(message)s\')\n- alfa_path = os.path.join(args.GALAXY_TOOL_DIR[0], \'ALFA.py\')\n-\n- #INPUT1: Annotation File\n- if args.indexes:\n- # The indexes submitted by the user must exhibit the suffix \'.(un)stranded.index\' and will be called by alfa by their prefix\n- index = symlink_user_indexes(args.indexes[0], args.indexes[1])\n- input1_args = \'-g "%s"\' % index\n- elif args.bi_indexes:\n- input1_args = \'-g "%s"\' % args.bi_indexes[0]\n- elif args.annotation_file:\n- input1_args = \'-a "%s"\' % args.annotation_file[0]\n- else:\n- exit_and_explain(\'No annotation file submitted !\')\n-\n- #INPUT 2: Mapped Reads\n- if args.reads:\n- input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0])\n- strandness = \'-s %s\' % args.strandness[0]\n- else:\n- exit_and_explain(\'No reads submitted !\')\n-\n- ##Output options\n- categories_depth = \'-d %s\' % args.categories_depth[0]\n- if not (args.output_pdf or args.output_png or args.output_svg):\n- output_args = \'--n\'\n- else:\n- if args.output_pdf:\n- output_args = \'--pdf plot.pdf\'\n- if args.output_png:\n- output_args = \'--png plot\'\n- if args.output_svg:\n- output_args = \'--svg plot\'\n- if args.threshold:\n- output_args = \'%s -t %.3f %.3f\' % (output_args, args.threshold[0], args.threshold[1])\n-\n- ##Run alfa\n- cmd = \'python %s %s %s %s %s %s\' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args)\n- logging.info("__________________________________________________________________\\n")\n- logging.info("Alfa execution")\n- logging.info("__________________________________________________________________\\n")\n- logging.info("Command Line:\\n%s\\n" % cmd)\n- logging.info("------------------------------------------------------------------\\n")\n- alfa_result = subprocess.Popen(args=cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n- alfa_out, alfa_err = alfa_result.communicate()\n-\n- ##Handle stdout, warning, errors...\n- redirect_errors(alfa_out, alfa_err)\n-\n- logging.info("Alfa prompt:\\n%s" % alfa_out)\n-\n- ##Redirect outputs\n- if args.output_pdf:\n- shutil.move(\'plot.pdf\', args.output_pdf[0])\n- if args.output_png:\n- shutil.move(\'plot\' + \'.categories.png\', args.output_png[0])\n- shutil.move(\'plot\' + \'.biotypes.png\', args.output_png[1])\n- if args.output_svg:\n- shutil.move(\'plot\' + \'.categories.svg\', args.output_svg[0])\n- shutil.move(\'plot\' + \'.biotypes.svg\', args.output_svg[1])\n- if args.output_count:\n- count_filename = merge_count_files(reads_labels)\n- shutil.move(count_filename, args.output_count[0])\n- if args.output_indexes:\n- if args.annotation_file:\n- indexes_regex = re.compile(\'.*\\.index\')\n- indexes = filter(indexes_regex.search, os.listdir(\'.\'))\n- indexes.sort()\n- shutil.move(indexes[0], args.output_indexes[0])\n- shutil.move(indexes[1], args.output_indexes[1])\n- if args.indexes:\n- shutil.move(index + \'.stranded.index\', args.output_indexes[0])\n- shutil.move(index + \'.unstranded.index\', args.output_indexes[1])\n- if args.bi_indexes:\n- shutil.move(args.bi_indexes[0] + \'.stranded.index\', args.output_index[0])\n- shutil.move(args.bi_indexes[1] + \'.unstranded.index\', args.output_index[1])\n-\n- cleanup_before_exit(tmp_dir)\n-main()\n' |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/test-data/alfa_toy-Biofeatures Distribution.pdf |
b |
Binary file ALFA/test-data/alfa_toy-Biofeatures Distribution.pdf has changed |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/test-data/alfa_toy.bam |
b |
Binary file ALFA/test-data/alfa_toy.bam has changed |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/test-data/alfa_toy.bedgraph --- a/ALFA/test-data/alfa_toy.bedgraph Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -Chr1 149 199 2 -Chr1 299 349 1 -Chr1 499 549 6 -Chr1 1099 1149 1 |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/test-data/alfa_toy.categories_counts --- a/ALFA/test-data/alfa_toy.categories_counts Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,5 +0,0 @@ -#Category,biotype Counts_in_bam Size_in_genome -CDS,protein_coding 300.0 624.0 -five_prime_utr,protein_coding 75.0 250.5 -three_prime_utr,protein_coding 25.0 126.5 -intergenic,intergenic 100.0 249.0 |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/test-data/alfa_toy.gtf --- a/ALFA/test-data/alfa_toy.gtf Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -Chr1 ensembl_havana gene 250 1250 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; -Chr1 ensembl_havana transcript 250 1250 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; -Chr1 ensembl_havana exon 375 1000 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; -Chr1 ensembl_havana CDS 375 1000 . + 0 gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; -Chr1 ensembl_havana five_prime_utr 250 375 . - . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; -Chr1 ensembl_havana three_prime_utr 1000 1250 . - . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/test-data/alfa_toy.stranded.index --- a/ALFA/test-data/alfa_toy.stranded.index Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,11 +0,0 @@ -#Chr1 1250 -Chr1 249 374 + protein_coding:gene,transcript -Chr1 249 374 - protein_coding:five_prime_utr -Chr1 374 375 + protein_coding:exon,CDS -Chr1 374 375 - protein_coding:five_prime_utr,three_prime_utr -Chr1 375 999 + protein_coding:exon,CDS -Chr1 375 999 - antisense -Chr1 999 1000 + protein_coding:exon,CDS -Chr1 999 1000 - protein_coding:three_prime_utr -Chr1 1000 1250 + protein_coding:gene,transcript -Chr1 1000 1250 - protein_coding:five_prime_utr,three_prime_utr,exon,CDS |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/test-data/alfa_toy.unstranded.index --- a/ALFA/test-data/alfa_toy.unstranded.index Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -#Chr1 1250 -Chr1 249 374 . protein_coding:five_prime_utr,gene,transcript -Chr1 374 375 . protein_coding:five_prime_utr,three_prime_utr,exon,CDS -Chr1 375 999 . protein_coding:exon,CDS -Chr1 999 1000 . protein_coding:three_prime_utr,exon,CDS -Chr1 1000 1250 . protein_coding:five_prime_utr,exon,CDS,three_prime_utr,gene,transcript |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/tool-data/alfa_indexes.loc.sample --- a/ALFA/tool-data/alfa_indexes.loc.sample Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -#<species> <version> <release> <value> <dbkey> <name> <prefix> -#Dictyostelium_discoideum dicty_2 7 Dictyostelium_discoideum_dicty_2_7 Dictyostelium_discoideum_dicty_2_7 Dictyostelium_discoideum: dicty_2 (release 7) <path_to_dicty_indexes_dir> |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/tool_data_table_conf.xml.sample --- a/ALFA/tool_data_table_conf.xml.sample Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,7 +0,0 @@ -<tables> - <!-- Locations of all alfa indexes --> - <table name="alfa_indexes" comment_char="#" allow_duplicate_entries="False"> - <columns>species, version, release, value, dbkey, name, prefix</columns> - <file path="tool-data/alfa_indexes.loc" /> - </table> -</tables> |
b |
diff -r b26aec436ab5 -r 7782babe0a62 ALFA/tool_dependencies.xml --- a/ALFA/tool_dependencies.xml Thu Dec 21 09:29:26 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="bedtools"> - <repository changeset_revision="3416a1d4a582" name="package_bedtools_2_24" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="samtools"> - <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="matplotlib"> - <repository changeset_revision="f7424e1cf115" name="package_python_2_7_matplotlib_1_4" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency> |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/.shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/.shed.yml Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,14 @@ +categories: +- Graphics +- Next Gen Mappers +- Sequence Analysis +- Visualization +description: A tool to Compute and display distribution of reads by genomic categories +long_description: | + ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s). + Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism. + https://github.com/biocompibens/ALFA +name: alfa +owner: charles_bernard +remote_repository_url: https://github.com/biocompibens/ALFA/tree/master/Galaxy_toolshed_repositories/ALFA +type: unrestricted |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/ALFA.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/ALFA.py Thu Dec 21 09:31:06 2017 -0500 |
[ |
b'@@ -0,0 +1,1160 @@\n+#!/usr/bin/python\n+#-*- coding: utf-8 -*-\n+\n+__author__ = \'noel & bahin\'\n+\'\'\' <decription> \'\'\'\n+\n+import argparse\n+import os\n+import numpy\n+import sys\n+import subprocess\n+import matplotlib.pyplot as plt\n+import matplotlib.cm as cmx\n+import matplotlib.colors as colors\n+import matplotlib.patheffects as PathEffects\n+import re\n+from matplotlib.backends.backend_pdf import PdfPages\n+# To correctly embbed the texts when saving plots in svg format\n+import matplotlib\n+matplotlib.rcParams[\'svg.fonttype\'] = \'none\'\n+\n+##########################################################################\n+# FUNCTIONS #\n+##########################################################################\n+\n+def init_dict(d, key, init):\n+\tif key not in d:\n+\t\td[key] = init\n+\n+def get_chromosome_lengths(args):\n+\t"""\n+\tParse the file containing the chromosomes lengths.\n+\tIf no length file is provided, browse the annotation file (GTF) to estimate the chromosome sizes (\n+\t"""\n+\tlengths={}\n+\tgtf_chrom_names=set()\n+\tforce_get_lengths = False\n+\t# If the user provided the chromosome length file\n+\tif args.chr_len:\n+\t\twith open(args.chr_len, \'r\') as chr_len_file:\n+\t\t\tfor line in chr_len_file:\n+\t\t\t\tlengths[line.split(\'\\t\')[0]] = int(line.rstrip().split(\'\\t\')[1])\n+\t\twith open(args.annotation,\'r\') as gtf_file:\n+\t\t\tfor line in gtf_file:\n+\t\t\t\tif not line.startswith(\'#\'):\n+\t\t\t\t\tchrom = line.split(\'\\t\')[0]\n+\t\t\t\t\tif chrom not in gtf_chrom_names:\n+\t\t\t\t\t\tgtf_chrom_names.add(chrom)\n+\t\tfor chrom in lengths.keys():\n+\t\t\tif chrom not in gtf_chrom_names:\n+\t\t\t\tprint "Warning: at least one chromosome name (\'"+chrom+"\') of the file \'"+args.chr_len+"\'does not match any chromosome name if GTF and was ignored."\n+\t\t\t\t#del lengths[chrom]\n+\t\t\t\tbreak\n+\t\tfor chrom in gtf_chrom_names:\n+\t\t\tif force_get_lengths: break\n+\t\t\tif chrom not in lengths.keys():\n+\t\t\t\tprint "WARNING: chromosome name \'"+chrom+"\' was found in gtf and does not match any chromosome name provided in",args.chr_len+". "\n+\t\t\t\tprint "\\t=> The chromosome lenghts will be approximated using annotations in the GTF file."\n+\t\t\t\tcontinue_value =""\n+\t\t\t\twhile continue_value not in {"yes","y","no","n"}:\n+\t\t\t\t\tcontinue_value = raw_input("\\tDo you want to continue (\'yes\' or \'y\')?\\n\\tElse write \'no\' or \'n\' to exit the script and check your file of lengths.\\n")\n+\t\t\t\t\tif continue_value == "no" or continue_value == "n":\n+\t\t\t\t\t\tsys.exit("Exiting")\n+\t\t\t\t\telif continue_value == "yes" or continue_value == "y":\n+\t\t\t\t\t\tforce_get_lengths = True\n+\t\t\t\t\t\tbreak\n+\t\t\t\t\tprint "Error: use \'yes/y/no/n\' only"\n+\t\tif not force_get_lengths:\n+\t\t\treturn lengths\n+\t# Otherwise, (or if at least one chromosome was missing in chromosome lengths file) we consider the end of the last annotation of the chromosome in the GTF file as the chromosome length\n+\twith open(args.annotation, \'r\') as gtf_file:\n+\t\tfor line in gtf_file:\n+\t\t\tif not line.startswith(\'#\'):\n+\t\t\t\tchrom = line.split(\'\\t\')[0]\n+\t\t\t\tend = int(line.split(\'\\t\')[4])\n+\t\t\t\tinit_dict(lengths, chrom, 0)\n+\t\t\t\tlengths[chrom] = max(lengths[chrom], end)\n+\t\tif force_get_lengths:\n+\t\t\tprint "The chromosome lenghts have been approximated using the last annotations in the GTF file."\n+\t\treturn lengths\n+\n+def write_feature_on_index(feat,chrom, start, stop, sign, stranded_genome_index, unstranded_genome_index=None):\n+\tgrouped_by_biotype_features = []\n+\tfor biotype,categs in feat.iteritems():\n+\t\tcateg_list=[]\n+\t\tfor cat in set(categs):\n+\t\t\tcateg_list.append(cat)\n+\t\tgrouped_by_biotype_features.append(":".join((str(biotype),",".join(categ_list))))\n+\tstranded_genome_index.write(\'\\t\'.join((chrom, start, stop, sign,\'\'))+\'\\t\'.join(grouped_by_biotype_features)+\'\\n\')\n+\tif unstranded_genome_index :\n+\t\tunstranded_genome_index.write(\'\\t\'.join((chrom, start, stop, \'.\',\'\'))+\'\\t\'.join(grouped_by_biotype_features)+\'\\n\')\n+\n+\n+def add_info(cpt, feat_values, start, stop, chrom=None, unstranded_genome_index=None, stranded_genome_index = None , biotype_prios=None, coverage=1, cate'..b'mples_files,samples_names,prios,genome_index, options.strandness[0], biotype_prios = None)\n+\n+\t#### Write the counts on disk\n+\twrite_counts_in_files(cpt,cpt_genome)\n+\n+if not (intersect_reads or process_counts) or (options.quiet and options.pdf == False):\n+\tquit("\\n### End of program")\n+print "\\n### Generating plots"\n+# Updating the biotypes lists (biotypes and \'biotype_group1\'): adding the \'unknow biotypes\' found in gtf/index\n+if unknown_feature == []: # \'unknown_feature\' is define only during the index generation\n+\t# Browse the feature to determine whether some biotypes are \'unknown\'\n+\tfor sample,counts in cpt.items():\n+\t\tfor (cat,biot) in counts:\n+\t\t\tif biot not in biotypes and cat not in unknown_feature:\n+\t\t\t\tunknown_feature.append(biot)\n+for new_biot in unknown_feature:\n+\tbiotypes.add(new_biot)\n+\tbiotypes_group1["others"].append(new_biot)\n+biotypes = sorted(biotypes)\n+# move antisense categ to the end of the list\n+biotypes.remove(\'antisense\')\n+biotypes.append(\'antisense\')\n+biotypes_group1 = sorted(biotypes_group1)\n+\n+\n+#print \'\\nCounts for every category/biotype pair: \',cpt\n+\n+# Generating plots\n+if options.pdf != False:\n+\tif options.pdf == None:\n+\t\toptions.pdf = "categories_plots.pdf"\n+\tpdf = PdfPages(options.pdf)\n+else:\n+\tpdf = False\n+\n+selected_biotype = None\n+if options.biotype_filter:\n+\toptions.biotype_filter = options.biotype_filter[0]\n+\tfor sample in cpt:\n+\t\tfor feature in cpt[sample]:\n+\t\t\tbiotype = feature[1]\n+\t\t\tif options.biotype_filter.lower() == biotype.lower():\n+\t\t\t\tselected_biotype=biotype\n+\t\t\t\tbreak\n+\tif selected_biotype == None :\n+\t\tprint "\\nError: biotype \'"+options.biotype_filter+"\' not found. Please check the biotype name and that this biotype exists in your sample(s)."\n+\t\tsys.exit()\n+\n+#Print a warning message if the UTRs are not specified as 5\' or 3\' (they will be ploted as 5\'UTR)\n+if \'UTR\' in [categ[0] for counts in cpt.values() for categ in counts.keys()]:\n+\tprint \'\'\'\\nWARNING: (some) 5\'UTR/3\'UTR are not precisely defined. Consequently, positions annotated as "UTR" will be counted as "5\'UTR"\\n\'\'\'\n+\n+#### Make the plot by categories\n+\t#### Recategorizing with the final categories\n+final_cats=categs_groups[options.categories_depth-1]\n+final_cat_cpt,final_genome_cpt, filtered_cat_cpt = group_counts_by_categ(cpt,cpt_genome,final_cats,selected_biotype)\n+\t#### Display the distribution of specified categories (or biotypes) in samples on a barplot\n+# Remove the \'antisense\' category if the library type is \'unstranded\'\n+for dic in cpt.values():\n+\tif (\'antisense\',\'antisense\') in dic.keys(): break\n+else:\n+\tcat_list.remove(\'antisense\')\n+make_plot(cat_list,samples_names,final_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold, svg = options.svg, png = options.png)\n+if selected_biotype :\n+\tmake_plot(cat_list,samples_names,filtered_cat_cpt,final_genome_cpt,pdf, "categories",options.threshold,title="Categories distribution for \'"+selected_biotype+"\' biotype", svg = options.svg, png = options.png)\n+\n+#### Make the plot by biotypes\n+\t#### Recategorizing with the final categories\n+final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes)\n+\t#### Display the distribution of specified categories (or biotypes) in samples on a barplot\n+make_plot(biotypes,samples_names,final_cat_cpt,final_genome_cpt,pdf, "biotypes",options.threshold, svg = options.svg, png = options.png)\n+\n+\n+\n+\t##### Recategorizing with the final categories\n+#final_cat_cpt,final_genome_cpt = group_counts_by_biotype(cpt,cpt_genome,biotypes_group1)\n+\t##### Display the distribution of specified categories (or biotypes) in samples on a barplot\n+#make_plot(biotypes_group1,samples_names,final_cat_cpt,final_genome_cpt,pdf,"Biotype groups", options.threshold, title="Biotypes distribution in mapped reads \\n(biotypes are grouped by \'family\')", svg = options.svg, png = options.png)\n+\n+\n+if options.pdf:\n+\tpdf.close()\n+\tprint "\\n### Plots saved in pdf file: %s" %options.pdf\n+\t\n+print "\\n### End of program"\n\\ No newline at end of file\n' |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/ALFA.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/ALFA.xml Thu Dec 21 09:31:06 2017 -0500 |
[ |
b'@@ -0,0 +1,329 @@\n+<tool id="alfa" name="ALFA" version="0.1.0">\n+\t<description>- Plot the distribution of genomic features in your aligned reads </description>\n+\n+\t<!-- ALFA requires bedtools suite v2.20.0 and above -->\n+\t<requirements>\n+ \t<requirement type="package">bedtools</requirement>\n+ \t<requirement type="package">samtools</requirement>\n+ \t<requirement type="package">matplotlib</requirement>\n+ \t</requirements>\n+\n+\t<command interpreter="python">\n+\t<![CDATA[\n+\t\tALFA_wrapper.py\n+\n+\t\t--project_name "${projectName}"\t\n+\n+\t\t##__INPUT 1: ANNOTATION OF THE SEQ/GENOME__##\n+\t\t#if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "index"\n+\t\t\t--index "$annotation.annotationSource[\'strandedIndex\']" "$annotation.annotationSource[\'unstrandedIndex\']"\n+\t\t#else if str ( $annotation.annotationSource[\'annotationSourceSelection\'] ) == "built_in_index"\n+\t\t\t--bi_index "$annotation.annotationSource.built_in_index_prefix.fields.prefix"\n+\t\t#else\n+\t\t\t--annotation "$annotation.annotationSource[\'annotationFile\']"\n+\t\t#end if\n+\n+\t\t##__INPUT 2: ALIGNED READS__##\n+\t\t--reads_format $reads.readsType[\'readsTypeSelection\']\n+\t\t\t--reads\n+\t\t#for $i, $r in enumerate ( $reads.readsType[\'readsList\'] ) \n+\t\t\t"__fname__$r.readsFile"\n+\t\t\t"__label__$r.readsLabel"\n+\t\t#end for\n+\t\t--strandness $reads[\'strandness\']\n+\n+\t\t##__OUTPUT FILES__##\n+\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n+\t\t\t#if str ( $outputOptions[\'plotFormat\'] ) == "pdf"\n+\t\t\t\t--output_pdf "$outputPdf"\n+\t\t\t#else if str ( $outputOptions[\'plotFormat\'] ) == "png"\n+\t\t\t\t--output_png "$outputCategoriesPng" "$outputBiotypesPng"\n+\t\t\t#else\n+\t\t\t\t--output_svg "$outputCategoriesSvg" "$outputBiotypesSvg"\n+\t\t\t#end if\n+\t\t#end if\n+\t\t#if str ( $outputFiles[\'countFile\'] ) == "True"\n+\t\t\t--output_count "$outputCountFile"\n+\t\t#end if\n+\t\t#if str ( $outputFiles[\'index\'] ) == "True"\n+\t\t\t--output_index "$outputStrandedIndex" "$outputUnstrandedIndex"\n+\t\t#end if\n+\n+\t\t##__OUTPUT OPTIONS__##\n+\t\t--categories_depth $outputOptions[\'categoriesDepth\']\n+\t\t#if str ( $outputFiles[\'plot\'] ) == "True"\n+\t\t\t--plot_format $outputOptions[\'plotFormat\']\n+\t\t\t#if str ( $outputOptions.plotThreshold[\'plotThresholdChoice\'] ) == "True"\n+\t\t\t\t--threshold $outputOptions.plotThreshold.yMin $outputOptions.plotThreshold.yMax\n+\t\t\t#end if\n+\t\t#end if\n+\n+\t\t--log_report "$logReport"\n+\t\t--tool_dir "$__tool_directory__"\n+\t]]>\n+\t</command>\n+\t<inputs>\n+\t\t<param name="projectName" value="ALFA" type="text" size="20" label="Project Name">\n+\t\t\t<validator type="empty_field" message="Please, specify a name for your project."/>\n+\t\t</param>\n+\n+\t\t<section name="annotation" title="INPUT 1: Annotation of your genome / sequence" expanded="True">\n+\t\t\t<conditional name="annotationSource">\n+\t\t\t\t<param name="annotationSourceSelection" type="select" label="Select the type of your annotation">\n+\t\t\t\t\t<option value="personal_gtf" selected="true">Personal annotation file (GTF format)</option>\n+\t\t\t\t\t<option value="index">Stranded and Unstranded Indexes previously generated by ALFA (Index format)</option>\n+\t\t\t\t\t<option value="built_in_index">Built-in indexes among a list of referenced genome (Index format)</option>\n+\t\t\t\t</param>\n+\t\t\t\t<when value="personal_gtf">\n+\t\t\t\t\t<param name="annotationFile" type="data" format="Gff, Gtf" label="Select your personal annotation file (GTF format)">\n+\t\t\t\t\t</param>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="index">\n+\t\t\t\t\t<param name="strandedIndex" type="data" format="index" label="Select your ALFA Stranded index file (index format)"/>\n+\t\t\t\t\t<param name="unstrandedIndex" type="data" format="index" label="Select your ALFA Unstranded index file (index format)"/>\n+\t\t\t\t</when>\n+\t\t\t\t<when value="built_in_index">\n+\t\t\t\t\t<param name="built_in_index_prefix" type="select" label="Select Genome">\n+\t\t\t\t\t\t<options from_data_table="alfa_indexes">\n+\t\t\t\t\t\t\t<validator type="no_options" message="No indexes are available for the selected input dataset. Ask your Galaxy Admin for to use ALFA_data_manager tool to build such indexes!" />\n+\t\t\t\t\t\t</options>\n+\t\t\t\t\t</para'..b'assert_stdout>\n+\t\t</test>\n+\t</tests>\n+\n+\t<help>\n+<![CDATA[\n+**What it does**\n+\n+\n+\t| ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s). \n+\t|\n+ \t| Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5\'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.\n+\n+----\n+\n+**ALFA acronym**\n+\n+- Annotation Landscape For Aligned reads\n+\n+----\n+\n+**Official documentation of the tool**\n+\n+\n+- https://github.com/biocompibens/ALFA\n+\n+----\n+\n+**Detailed example**\n+\n+- https://github.com/biocompibens/ALFA#detailed-example\n+\n+----\n+\n+**Nota Bene**\n+\n+* **Input 1: Annotation File**\n+\n+\n+\t| ALFA requires as first input an annotation file (sequence, genome...) in gtf format in order to generate alfa indexes needed in a second round of the program.\n+\t| Indexes are files which list all the coordinates of the categories (stop codon, 5\'-UTR, CDS, intergenic...) and biotypes (protein coding genes, miRNA, tRNA, ...) encountered in the annotated sequence.\n+\t|\n+\t\n+\t.. class:: warningmark\n+\n+\t| Gtf File must be sorted.\n+\t|\n+\n+\t.. class:: infomark\n+\n+\t| Generation of indexes from an annotation file might be time consuming (i.e ~10min for the human genome). Thus, ALFA allows the user to submit directly indexes generated in previous runs as inputs for a new run.\n+\t|\n+\n+\t.. class:: infomark\n+\n+\t| ALFA also enables the use of built-in indexes to save even more computational time. In order to generate easily these built-in indexes, install the data manager tool `ALFA_data_manager`_ available on the toolshed.\n+\n+\t.. _data_manager_build_alfa_indexes: https://toolshed.g2.bx.psu.edu/view/charles-bernard/data_manager_build_alfa_indexes\n+\n+* **Input 2: Reads**\n+\n+\t| ALFA requires as second input a single or a set of mapped reads file(s) in either bam or bedgraph format. The coordinates of the mapped reads will be intersected with the according categories and biotypes mentioned in the indexes.\n+\t| The strandness option determines which strand of the annotated sequence will be taken into account during this intersection.\n+\t|\n+\n+\t.. class:: warningmark\n+\n+\t| Bam or Bedgraph file(s) must be sorted.\n+\t|\n+\n+\t.. class:: warningmark\n+\n+\t| Chromosome names in reads and in annotation file (gtf or indexes) must be the same for the intersection to occur\n+\t|\n+\n+* **Output files**\n+\n+\t| The result of the intersection is a count file displaying the count of nucleotides in the reads for each genomic categories and biotypes. From this count file, plots of the raw and normalized distributions of the reads among these categories are generated.\n+\t| In the output files section, the user can choose what kind of files he/she desires as ALFA output. Categories Count File and Plots are proposed by default. \n+\t|\n+\n+\t.. class:: infomark\n+\n+\t| The user can also select the \'indexes\' option as output. This option is interesting if you plan to run ALFA again with the same submitted annotation file. *See Nota Bene/Input 1: Annotation File for more information.*\n+\t|\n+\n+\t- `How the plots look like`_\n+\n+\t.. _How the plots look like: https://github.com/biocompibens/ALFA#plots\n+\n+\t|\n+\n+\t- `How they are generated`_ \n+\n+\t.. _How they are generated: https://github.com/biocompibens/ALFA#detailed-example\n+\n+----\n+\n+**ALFA Developpers**\n+\n+\t| Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin: *compbio team, Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris*\n+\n+]]>\n+ </help>\n+\n+ <citations>\n+ \t<citation type="bibtex">@MISC{\n+ \t\tauthor="Beno\xc3\xaet No\xc3\xabl and Mathieu Bahin"\n+ \t\ttitle="ALFA: Annotation Landscape For Aligned reads"\n+ \t\tcrossref="https://github.com/biocompibens/ALFA"\n+ \t\tinstitution="Institut de Biologie de l\'Ecole Normale Sup\xc3\xa9rieure de Paris"\n+ \t\t}\n+ \t</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/ALFA_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/ALFA_wrapper.py Thu Dec 21 09:31:06 2017 -0500 |
[ |
b'@@ -0,0 +1,183 @@\n+#!/usr/bin/python\n+\n+import argparse\n+import logging\n+import os\n+import re\n+import shutil\n+import subprocess\n+import sys\n+import tempfile\n+\n+def exit_and_explain(msg):\n+ logging.critical(msg)\n+ sys.exit(msg)\n+\n+def cleanup_before_exit(tmp_dir):\n+ if tmp_dir and os.path.exists(tmp_dir):\n+ shutil.rmtree(tmp_dir)\n+\n+def get_arg():\n+ parser = argparse.ArgumentParser()\n+ parser.add_argument(\'--project_name\', dest=\'project_name\', action=\'store\', nargs=1, metavar=\'project_name\', type=str)\n+ #Input 1: Annotation File\n+ parser.add_argument(\'--index\', dest=\'indexes\', action=\'store\', nargs=2, metavar=(\'stranded_index_filename\', \'unstranded_index_filename\'), type=str)\n+ parser.add_argument(\'--bi_index\', dest=\'bi_indexes\', action=\'store\', nargs=1, metavar=\'built_in_indexes_dir_path\', type=str )\n+ parser.add_argument(\'--annotation\', dest=\'annotation_file\', action=\'store\', nargs=1, metavar=\'annotation_gtf_file\', type=str )\n+ #Input 2: Mapped Reads\n+ parser.add_argument(\'--reads_format\', dest=\'reads_format\', action=\'store\', nargs=1, choices=[\'bam\', \'bedgraph\'], metavar=\'reads_format\', type=str)\n+ parser.add_argument(\'--reads\', dest=\'reads\', action=\'store\', nargs=\'+\', metavar=(\'bam_file1 label1\',""), type=str)\n+ parser.add_argument(\'--strandness\', dest=\'strandness\', action=\'store\', nargs=1, default=[\'unstranded\'], choices=[\'unstranded\', \'forward\', \'reverse\'], metavar=\'strandness\', type=str)\n+ #Output files\n+ parser.add_argument(\'--output_pdf\', dest=\'output_pdf\', action=\'store\', nargs=1, metavar=\'output_pdf_filename\', type=str)\n+ parser.add_argument(\'--output_svg\', dest=\'output_svg\', action=\'store\', nargs=2, metavar=(\'categories_svg_filename\', \'biotypes_svg_filename\'), type=str)\n+ parser.add_argument(\'--output_png\', dest=\'output_png\', action=\'store\', nargs=2, metavar=(\'categories_png_filename\', \'biotypes_png_filename\'), type=str)\n+ parser.add_argument(\'--output_count\', dest=\'output_count\', action=\'store\', nargs=1, metavar=\'output_count_filename\', type=str)\n+ parser.add_argument(\'--output_index\', dest=\'output_indexes\', action=\'store\', nargs=2, metavar=(\'output_stranded_index_filename\', \'output_unstranded_index_filename\'), type=str)\n+ #Output Options\n+ parser.add_argument(\'--categories_depth\', dest=\'categories_depth\', action=\'store\', nargs=1, default=[3], choices=range(1,5), metavar=\'categories_depth\', type=int)\n+ parser.add_argument(\'--plot_format\', dest=\'plot_format\', action=\'store\', nargs=1, choices=[\'pdf\', \'png\', \'svg\'], metavar=\'plot_format\', type=str)\n+ parser.add_argument(\'--threshold\', dest=\'threshold\', action=\'store\', nargs=2, metavar=(\'yMin\', \'yMax\'), type=float)\n+ #Internal variables\n+ parser.add_argument(\'--log_report\', dest=\'log_report\', action=\'store\', nargs=1, metavar=\'log_filename\', type=str)\n+ parser.add_argument(\'--tool_dir\', dest=\'GALAXY_TOOL_DIR\', action=\'store\', nargs=1, metavar=\'galaxy_tool_dir_path\', type=str)\n+ args = parser.parse_args()\n+ return args\n+\n+def symlink_user_indexes(stranded_index, unstranded_index):\n+ index=\'index\'\n+ os.symlink(stranded_index, index + \'.stranded.index\')\n+ os.symlink(unstranded_index, index + \'.unstranded.index\')\n+ return index\n+\n+def get_input2_args(reads_list, format):\n+ n = len(reads_list)\n+ if n%2 != 0:\n+ exit_and_explain(\'Problem with pairing reads filename and reads label\')\n+ if format == \'bam\':\n+ input2_args = \'--bam\'\n+ elif format == \'begraph\':\n+ input2_args = \'--bedgraph\'\n+ input2_args=\'-i\'\n+ k = 0\n+ reads_filenames = [\'\'] * (n/2)\n+ reads_labels = [\'\'] * (n/2)\n+ for i in range(0, n, 2):\n+ reads_filenames[k] = reads_list[i].split(\'__fname__\')[1]\n+ cur_label = reads_list[i+1].split(\'__label__\')[1]\n+ reads_labels[k] = re.sub(r\' \', \'_\', cur_label)\n+ if not reads_labels[k]:\n+ reads_labels[k] = \'sample_%s\' % str(k)\n+ input2_args=\'%s "%s" "%s"\' % (input2_args, reads_filenames[k]'..b'ount_file.close()\n+ merged_count_file.close()\n+ return \'count_file.txt\'\n+\n+def main():\n+ args = get_arg()\n+\n+ if not (args.output_pdf or args.output_png or args.output_svg or args.output_indexes or args.output_count):\n+ exit_and_explain(\'Error: no output to return\\nProcess Aborted\\n\')\n+ tmp_dir = tempfile.mkdtemp(prefix=\'tmp\', suffix=\'\')\n+ logging.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format=\'%(message)s\')\n+ alfa_path = os.path.join(args.GALAXY_TOOL_DIR[0], \'ALFA.py\')\n+\n+ #INPUT1: Annotation File\n+ if args.indexes:\n+ # The indexes submitted by the user must exhibit the suffix \'.(un)stranded.index\' and will be called by alfa by their prefix\n+ index = symlink_user_indexes(args.indexes[0], args.indexes[1])\n+ input1_args = \'-g "%s"\' % index\n+ elif args.bi_indexes:\n+ input1_args = \'-g "%s"\' % args.bi_indexes[0]\n+ elif args.annotation_file:\n+ input1_args = \'-a "%s"\' % args.annotation_file[0]\n+ else:\n+ exit_and_explain(\'No annotation file submitted !\')\n+\n+ #INPUT 2: Mapped Reads\n+ if args.reads:\n+ input2_args, reads_filenames, reads_labels = get_input2_args(args.reads, args.reads_format[0])\n+ strandness = \'-s %s\' % args.strandness[0]\n+ else:\n+ exit_and_explain(\'No reads submitted !\')\n+\n+ ##Output options\n+ categories_depth = \'-d %s\' % args.categories_depth[0]\n+ if not (args.output_pdf or args.output_png or args.output_svg):\n+ output_args = \'--n\'\n+ else:\n+ if args.output_pdf:\n+ output_args = \'--pdf plot.pdf\'\n+ if args.output_png:\n+ output_args = \'--png plot\'\n+ if args.output_svg:\n+ output_args = \'--svg plot\'\n+ if args.threshold:\n+ output_args = \'%s -t %.3f %.3f\' % (output_args, args.threshold[0], args.threshold[1])\n+\n+ ##Run alfa\n+ cmd = \'python %s %s %s %s %s %s\' % (alfa_path, input1_args, input2_args, strandness, categories_depth, output_args)\n+ logging.info("__________________________________________________________________\\n")\n+ logging.info("Alfa execution")\n+ logging.info("__________________________________________________________________\\n")\n+ logging.info("Command Line:\\n%s\\n" % cmd)\n+ logging.info("------------------------------------------------------------------\\n")\n+ alfa_result = subprocess.Popen(args=cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+ alfa_out, alfa_err = alfa_result.communicate()\n+\n+ ##Handle stdout, warning, errors...\n+ redirect_errors(alfa_out, alfa_err)\n+\n+ logging.info("Alfa prompt:\\n%s" % alfa_out)\n+\n+ ##Redirect outputs\n+ if args.output_pdf:\n+ shutil.move(\'plot.pdf\', args.output_pdf[0])\n+ if args.output_png:\n+ shutil.move(\'plot\' + \'.categories.png\', args.output_png[0])\n+ shutil.move(\'plot\' + \'.biotypes.png\', args.output_png[1])\n+ if args.output_svg:\n+ shutil.move(\'plot\' + \'.categories.svg\', args.output_svg[0])\n+ shutil.move(\'plot\' + \'.biotypes.svg\', args.output_svg[1])\n+ if args.output_count:\n+ count_filename = merge_count_files(reads_labels)\n+ shutil.move(count_filename, args.output_count[0])\n+ if args.output_indexes:\n+ if args.annotation_file:\n+ indexes_regex = re.compile(\'.*\\.index\')\n+ indexes = filter(indexes_regex.search, os.listdir(\'.\'))\n+ indexes.sort()\n+ shutil.move(indexes[0], args.output_indexes[0])\n+ shutil.move(indexes[1], args.output_indexes[1])\n+ if args.indexes:\n+ shutil.move(index + \'.stranded.index\', args.output_indexes[0])\n+ shutil.move(index + \'.unstranded.index\', args.output_indexes[1])\n+ if args.bi_indexes:\n+ shutil.move(args.bi_indexes[0] + \'.stranded.index\', args.output_index[0])\n+ shutil.move(args.bi_indexes[1] + \'.unstranded.index\', args.output_index[1])\n+\n+ cleanup_before_exit(tmp_dir)\n+main()\n' |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/test-data/alfa_toy-Biofeatures Distribution.pdf |
b |
Binary file alfa/test-data/alfa_toy-Biofeatures Distribution.pdf has changed |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/test-data/alfa_toy.bam |
b |
Binary file alfa/test-data/alfa_toy.bam has changed |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/test-data/alfa_toy.bedgraph --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/test-data/alfa_toy.bedgraph Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,4 @@ +Chr1 149 199 2 +Chr1 299 349 1 +Chr1 499 549 6 +Chr1 1099 1149 1 |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/test-data/alfa_toy.categories_counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/test-data/alfa_toy.categories_counts Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,5 @@ +#Category,biotype Counts_in_bam Size_in_genome +CDS,protein_coding 300.0 624.0 +five_prime_utr,protein_coding 75.0 250.5 +three_prime_utr,protein_coding 25.0 126.5 +intergenic,intergenic 100.0 249.0 |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/test-data/alfa_toy.gtf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/test-data/alfa_toy.gtf Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,6 @@ +Chr1 ensembl_havana gene 250 1250 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; +Chr1 ensembl_havana transcript 250 1250 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; +Chr1 ensembl_havana exon 375 1000 . + . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; +Chr1 ensembl_havana CDS 375 1000 . + 0 gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; +Chr1 ensembl_havana five_prime_utr 250 375 . - . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; +Chr1 ensembl_havana three_prime_utr 1000 1250 . - . gene_id "ENSMUSG00000051951"; gene_biotype "protein_coding"; |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/test-data/alfa_toy.stranded.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/test-data/alfa_toy.stranded.index Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,11 @@ +#Chr1 1250 +Chr1 249 374 + protein_coding:gene,transcript +Chr1 249 374 - protein_coding:five_prime_utr +Chr1 374 375 + protein_coding:exon,CDS +Chr1 374 375 - protein_coding:five_prime_utr,three_prime_utr +Chr1 375 999 + protein_coding:exon,CDS +Chr1 375 999 - antisense +Chr1 999 1000 + protein_coding:exon,CDS +Chr1 999 1000 - protein_coding:three_prime_utr +Chr1 1000 1250 + protein_coding:gene,transcript +Chr1 1000 1250 - protein_coding:five_prime_utr,three_prime_utr,exon,CDS |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/test-data/alfa_toy.unstranded.index --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/test-data/alfa_toy.unstranded.index Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,6 @@ +#Chr1 1250 +Chr1 249 374 . protein_coding:five_prime_utr,gene,transcript +Chr1 374 375 . protein_coding:five_prime_utr,three_prime_utr,exon,CDS +Chr1 375 999 . protein_coding:exon,CDS +Chr1 999 1000 . protein_coding:three_prime_utr,exon,CDS +Chr1 1000 1250 . protein_coding:five_prime_utr,exon,CDS,three_prime_utr,gene,transcript |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/tool-data/alfa_indexes.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/tool-data/alfa_indexes.loc.sample Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,2 @@ +#<species> <version> <release> <value> <dbkey> <name> <prefix> +#Dictyostelium_discoideum dicty_2 7 Dictyostelium_discoideum_dicty_2_7 Dictyostelium_discoideum_dicty_2_7 Dictyostelium_discoideum: dicty_2 (release 7) <path_to_dicty_indexes_dir> |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/tool_data_table_conf.xml.sample Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all alfa indexes --> + <table name="alfa_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>species, version, release, value, dbkey, name, prefix</columns> + <file path="tool-data/alfa_indexes.loc" /> + </table> +</tables> |
b |
diff -r b26aec436ab5 -r 7782babe0a62 alfa/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alfa/tool_dependencies.xml Thu Dec 21 09:31:06 2017 -0500 |
b |
@@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="bedtools"> + <repository changeset_revision="3416a1d4a582" name="package_bedtools_2_24" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="samtools"> + <repository changeset_revision="f6ae3ba3f3c1" name="package_samtools_1_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="matplotlib"> + <repository changeset_revision="f7424e1cf115" name="package_python_2_7_matplotlib_1_4" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency> |