Mercurial > repos > charles-bernard > alfa

<tool id="alfa" name="ALFA" version="0.1.0">
	<description>- Compute and display distribution of reads by genomic categories</description>

	<!-- ALFA requires bedtools suite v2.20.0 and above -->
	<requirements>
    	<requirement type="package" version="2.24">bedtools</requirement>
    	<requirement type="package" version="1.2">samtools</requirement>
    	<requirement type="package" version="1.4">matplotlib</requirement>
  	</requirements>

	<command interpreter="python">
	<![CDATA[
		ALFA_wrapper.py

		--project_name "${projectName}"

		##__INPUT 1__##
		#if str ( $annotation.annotationSource['annotationSourceSelection'] ) == "index"
			--index "$annotation.annotationSource['strandedIndex']" "$annotation.annotationSource['unstrandedIndex']"
		#else if str ( $annotation.annotationSource['annotationSourceSelection'] ) == "built_in_index"
			--bi_index "$annotation.annotationSource.built_in_index_prefix.fields.prefix"
		#else
			--annotation "$annotation.annotationSource['annotationFile']"
		#end if

		##__INPUT 2__##
		--reads_format $reads.readsType['readsTypeSelection']
			--reads
		#for $i, $r in enumerate ( $reads.readsType['readsList'] )
			"__fname__$r.readsFile"
			"__label__$r.readsLabel"
		#end for
		--strandness $reads['strandness']

		##__OUTPUT FILES__##
		#if str ( $outputFiles['plot'] ) == "True"
			#if str ( $outputOptions['plotFormat'] ) == "pdf"
				--output_pdf "$outputPdf"
			#else if str ( $outputOptions['plotFormat'] ) == "png"
				--output_png "$outputCategoriesPng" "$outputBiotypesPng"
			#else
				--output_svg "$outputCategoriesSvg" "$outputBiotypesSvg"
			#end if
		#end if
		#if str ( $outputFiles['countFile'] ) == "True"
			--output_count "$outputCountFile"
		#end if
		#if str ( $outputFiles['index'] ) == "True"
			--output_index "$outputStrandedIndex" "$outputUnstrandedIndex"
		#end if

		##__OUTPUT OPTIONS__##
		--categories_depth $outputOptions['categoriesDepth']
		#if str ( $outputFiles['plot'] ) == "True"
			--plot_format $outputOptions['plotFormat']
			#if str ( $outputOptions.plotThreshold['plotThresholdChoice'] ) == "True"
				--threshold $outputOptions.plotThreshold.yMin $outputOptions.plotThreshold.yMax
			#end if
		#end if

		--log_report "$logReport"
		--tool_dir "$__tool_directory__"
	]]>
	</command>
	<inputs>
		<param name="projectName" value="ALFA" type="text" size="20" label="Project Name">
			<validator type="empty_field" message="Please, specify a name for your project."/>
		</param>

		<section name="annotation" title="INPUT 1: Annotation File (GTF format)" expanded="True">
			<conditional name="annotationSource">
				<param name="annotationSourceSelection" type="select" label="Select the source of your annotated sequence/genome">
					<option value="personal_gtf" selected="true">Personal annotation file (GTF format)</option>
					<option value="index">Stranded and Unstranded Indexes previously generated by ALFA (Index format)</option>
					<option value="built_in_index">Built-in indexes among a list of referenced genome (Index format)</option>
				</param>
				<when value="personal_gtf">
					<param name="annotationFile" type="data" format="Gff, Gtf" label="Select your personal annotation file (GTF format)">
					</param>
				</when>
				<when value="index">
					<param name="strandedIndex" type="data" format="index" label="Select your ALFA Stranded index file (index format)"/>
					<param name="unstrandedIndex" type="data" format="index" label="Select your ALFA Unstranded index file (index format)"/>
				</when>
				<when value="built_in_index">
					<param name="built_in_index_prefix" type="select" label="Select Genome">
						<options from_data_table="alfa_indexes">
							<validator type="no_options" message="No indexes are available for the selected input dataset" />
						</options>
					</param>
				</when>
			</conditional>
		</section>

		<section name="reads" title="INPUT 2: Aligned Reads File(s) of the annotated sequence (BAM or BEDGRAPH format)" expanded="True">
			<conditional name="readsType">
				<param name="readsTypeSelection" type="select" label="Select the format of the reads file(s)">
					<option value="bam" selected="true">BAM</option>
					<option value="bedgraph">BEDGRAPH</option>
				</param>
				<when value="bam">
					<repeat name="readsList" title="Reads File" min="1" >
						<param name="readsFile" type="data" format="Bam" label="Select the reads file of your annotated sequence (BAM format)"/>
						<param name="readsLabel" type="text" size="20" value="" label="Label of the reads" optional="True"/>
					</repeat>
				</when>
				<when value="bedgraph">
					<repeat name="readsList" title="Reads File" min="1">
						<param name="readsFile" type="data" format="Bed" label="Select the reads file of your annotated sequence (BEDGRAPH format)"/>
						<param name="readsLabel" type="text" size="20" value="" label="Label of the reads" optional="True"/>
					</repeat>
				</when>
			</conditional>
			<param name="strandness" type="select" label="Select the strandness of your mapped reads dataset">
				<option value="unstranded" selected="true">Unstranded (reads will match genomic features on both forward and reverse strands of the annotated sequence)</option>
				<option value="forward">Forward (reads will match only genomic features on the forward strand of the annotated sequence)</option>
				<option value="reverse">Reverse (reads will match only genomic features on the reverse strand of the annotated sequence)</option>
			</param>
		</section>

		<section name="outputFiles" title="OUTPUT FILES: Choose the output files" expanded="False">
			<param name="plot" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Categories and Biotypes Histograms" help="Plot the nucleotides distribution of the reads per genomic categories and biotypes"/>
			<param name="countFile" type="boolean" truevalue="True" falsevalue="False" checked="True" label="Categories Count File" help="Edit the exact count of nucleotides in the reads per genomic categories and biotypes"/>
			<param name="index" type="boolean" truevalue="True" falsevalue="False" checked="False" label ="Indexes" help="Print the resulting stranded and unstranded indexes from the gtf input file (useful if you plan to run ALFA again with this annotated sequence)"/>
		</section>

		<section name="outputOptions" title="ADVANCED SETTINGS" expanded="False">
			<param name="categoriesDepth" type="select" label="Categories to Display">
				<option value="1">gene | intergenic</option>
				<option value="2">exon | intron | intergenic</option>
				<option value="3" selected="true">5’-UTR | CDS | 3’-UTR | intron | intergenic</option>
				<option value="4">5’-UTR | start_codon | CDS | stop_codon | 3’-UTR | intron | intergenic</option>
			</param>
			<param name="plotFormat" type="select" label="Plot Options: Select graph format" help="Ignore if you did not choose the histograms output file">
				<option value="pdf" selected="true">pdf</option>
				<option value="svg">svg</option>
				<option value="png">png</option>
			</param>
			<conditional name="plotThreshold">
				<param name="plotThresholdChoice" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Plot Options: Modify y axis range of the normalized counts of bio-features" help="Ignore if you did not choose the histograms output file"/>
					<when value="True">
						<param name="yMin" type="float" value="-2.0" label="y min"/>
						<param name="yMax" type="float" value="2.0" label="y max"/>
					</when>
					<when value="False"></when>
			</conditional>
		</section>
	</inputs>

	<outputs>
		<data name="logReport" format="txt" label="${projectName}-Log Report"/>
		<data name="outputPdf" format="pdf" label="${projectName}-BioFeatures Distribution">
			<filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'pdf'</filter>
		</data>
		<data name="outputCategoriesPng" format="png" label="${projectName}-Categories Distribution">
			<filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'png'</filter>
		</data>
		<data name="outputBiotypesPng" format="png" label="${projectName}-Biotypes Distribution">
			<filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'png'</filter>
		</data>
		<data name="outputCategoriesSvg" format="svg" label="${projectName}-Categories Distribution">
			<filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'svg'</filter>
		</data>
		<data name="outputBiotypesSvg" format="svg" label="${projectName}-Biotypes Distribution">
			<filter>outputFiles['plot'] is True and outputOptions['plotFormat'] == 'svg'</filter>
		</data>
		<data name="outputCountFile" format="txt" label="${projectName}-Categories Count">
			<filter>outputFiles['countFile'] is True</filter>
		</data>
		<data name="outputStrandedIndex" format="txt" label="${projectName}-Stranded Index">
			<filter>outputFiles['index'] is True</filter>
		</data>
		<data name="outputUnstrandedIndex" format="txt" label="${projectName}-Unstranded Index">
			<filter>outputFiles['index'] is True</filter>
		</data>
	</outputs>

	<tests>
		<test>
			<param name="alfa_toy" />
			<section name="annotation">
				<conditional name="annotationSource">
					<param name="annotationSourceSelection" value="personal_gtf" />
					<param name="annotationFile" value="alfa_toy.gtf" ftype="gtf" />
				</conditional>
			</section>
			<section name="reads">
				<conditional name="readsType">
					<param name="readsTypeSelection" value="bam" />
					<repeat name="readsList">
						<param name="readsFile" value="alfa_toy.bam" ftype="bam" />
						<param name="readsLabel" value="alfa_toy" />
					</repeat>
					<param name="strandness" value="unstranded" />
				</conditional>
			</section>
			<section name="outputFiles">
				<param name="plot" value="True" />
				<param name="countFile" value="True" />
				<param name="index" value="True" />
			</section>
			<section name="outputOptions">
				<param name="categoriesDepth" value="3" />
				<param name="plotFormat" value="pdf" />
				<conditional name="plotThreshold">
					<param name="plotThresholdChoice" value="False" />
				</conditional>
			</section>
			<output name="outputPdf" file="alfa_toy-Biofeatures Distribution.pdf" ftype="pdf" />
			<output name="outputCountFile" file="alfa_toy.categories_count" ftype="txt" />
			<output name="outputStrandedIndex" file="alfa_toy.stranded.index" ftype="txt" />
			<output name="outputUnstrandedIndex" file="alfa_toy.unstranded.index" ftype="txt" />
			<assert_stdout>
				<has_text text="### End of the program" />
			</assert_stdout>
		</test>
	</tests>

	<help>
<![CDATA[
**What it does**


	| ALFA provides a global overview of features distribution composing New Generation Sequencing dataset(s).
	|
 	| Given a set of aligned reads (BAM files) and an annotation file (GTF format), the tool produces plots of the raw and normalized distributions of those reads among genomic categories (stop codon, 5'-UTR, CDS, intergenic, etc.) and biotypes (protein coding genes, miRNA, tRNA, etc.). Whatever the sequencing technique, whatever the organism.

----

**ALFA acronym**

- Annotation.Landscape.For.Aligned reads

----

**Official documentation of the tool**


- https://github.com/biocompibens/ALFA

----

**Detailed example**

- https://github.com/biocompibens/ALFA#detailed-example

----

**Nota Bene**

* **Input 1: Annotation File**


	| ALFA requires as first input an annotation file (sequence, genome...) in gtf format in order to generate alfa indexes needed in a second round of the program.
	| Indexes are files which list all the coordinates of the categories (stop codon, 5'-UTR, CDS, intergenic...) and biotypes (protein coding genes, miRNA, tRNA, ...) encountered in the annotated sequence.
	|

	.. class:: warningmark

	| Gtf File must be sorted.
	|

	.. class:: infomark

	| Generation of indexes from an annotation file might be time consuming (i.e ~10min for the human genome). Thus, ALFA allows the user to submit directly indexes generated in previous runs as inputs for a new run.
	|

	.. class:: infomark

	| ALFA also allows to use built-in indexes to save even more computational time. In order to generate easily these built-in indexes, install the data manager tool `data_manager_build_alfa_indexes`_ available on the toolshed.

	.. _data_manager_build_alfa_indexes: https://toolshed.g2.bx.psu.edu/view/charles-bernard/data_manager_build_alfa_indexes

* **Input 2: Reads**

	| ALFA requires as second input a single or a set of mapped reads file(s) in either bam or bedgraph format. The coordinates of the mapped reads will be intersected with the according categories and biotypes mentioned in the indexes.
	| The strandness option determines which strand of the annotated sequence will be taken into account during this intersection.
	|

	.. class:: warningmark

	| Bam or Bedgraph file(s) must be sorted.
	|

	.. class:: warningmark

	| Chromosome names in reads and in annotation file (gtf or indexes) must be the same for the intersection to occur
	|

* **Output files**

	| The result of the intersection is a count file displaying the count of nucleotides in the reads for each genomic categories and biotypes. From this count file, plots of the raw and normalized distributions of the reads among these categories are generated.
	| In the output files section, the user can choose what kind of files he/she desires as ALFA output. Categories Count File and Plots are proposed by default.
	|

	.. class:: infomark

	| The user can also select the 'indexes' option as output. This option is interesting if you plan to run ALFA again with the same submitted annotation file. *See Nota Bene/Input 1: Annotation File for more information.*
	|

	- `How the plots look like`_

	.. _How the plots look like: https://github.com/biocompibens/ALFA#plots

	|

	- `How they are generated`_

	.. _How they are generated: https://github.com/biocompibens/ALFA#detailed-example

----

**ALFA Developpers**

	| Benoît Noël and Mathieu Bahin: *compbio team, Institut de Biologie de l'Ecole Normale Supérieure de Paris*

]]>
     </help>

     <citations>
     	<citation type="bibtex">@MISC{
     		author="Benoît Noël and Mathieu Bahin"
     		title="ALFA: Annotation Landscape For Aligned reads"
     		crossref="https://github.com/biocompibens/ALFA"
     		institution="Institut de Biologie de l'Ecole Normale Supérieure de Paris"
     		}
     	</citation>
     </citations>
</tool>
author	charles-bernard
date	Sun, 18 Dec 2016 09:33:46 -0500
parents	c8acc8808b52
children