Mercurial > repos > aafc-mbb > itsx

<tool id="ITSx" name="ITSx Extractor" version="1.0.11">

	<description>
		ITSx -- Identifies ITS sequences and extracts the ITS region
	</description>

	<requirements>
		<requirement type="package" version="3.1b2">hmmer</requirement>
	</requirements>

	<stdio>
		<regex match="ITSx" source="both" level="log"/>
		<regex match="analysis" source="both" level="log"/>
		<regex match="ERROR" source="both" level="fatal"/>
		<regex match="error" source="both" level="fatal"/>
	</stdio>

	<command interpreter="perl">
		ITSx -i $input
		--detailed_results T
		-E $domain_eval
		-S $domain_score
		-N $domain_num
		--cpu=\${GALAXY_SLOTS:-1}
		$allow_reorder
		$complement
		$heuristics
		$preserve
		$reset
		$truncate
		#if $hmmer_search.search == "eval"
			--search_eval $hmmer_search.eval_cutoff
		#else
			--search_score $hmmer_search.score_cutoff
		#end if
	</command>

	<inputs>
		<param name="input" type="data" format="fasta" label="Input Fasta"/>

		<param name="domain_eval" type="float" value="0.00001" label="Domain E-value Cutoff"
		help="Domain E-value cutoff a sequence must obtain in the HMMER-based step to be included in the output."/>

		<param name="domain_score" type="integer" value="0" label="Domain Score Cutoff"
		help="Domain score cutoff that a sequence must obtain in the HMMER-based step to be included in the output."/>

		<param name="domain_num" type="integer" value="2" label="Minimum Number of Domains"
    		help="The minimum number of domains (different HMM gene profiles) that must match a sequence or it to be included in the output (detected as an ITS sequence). Setting the value 			lower than two will increase the number of false positives, while increasing it above two will decrease ITSx detection abilities on fragmentary data. "/>

		<conditional name="hmmer_search">
			<param name="search" type="select" label="HMMER Search Type">
				<option value="eval">Search E-value</option>
				<option value="score">Search Score</option>
			</param>
			<when value="eval">
				<param name="eval_cutoff" type="float" value="0.01" label="Search E-value"
				help="The actual E-value cutoff used in the HMMER search. High numbers may slow down the process. Should never be set to a lower value than the Domain E-value Cutoff opion. 					Cannot be used in combination with Search Score option."/>
			</when>
			<when value="score">
				<param name="score_cutoff" type="integer" value="0" label="Seach Score"
				help="The score cutoff used in the HMMER search. Low numbers may slow down the process. Should never be set to a higher number than the Domain Score Cutoff. Cannot be used 					in combination with the Search E-value option."/>
			</when>
		</conditional>

		<param name="reset" type="boolean" checked="true" truevalue="--reset T" falsevalue="--reset F" label="Re-creates the HMM-database before ITSx is run"/>

		<!--  <param name="cpu" type="integer" value="1" label="cpu"/> -->

		<param name="allow_reorder" type="boolean" checked="false" truevalue="--allow_reorder T" falsevalue="--allow_reorder F" label="Allow profiles not to be in the expected order on the 			extracted sequences"/>

		<param name="complement" type="boolean" checked="true" truevalue="--complement T" falsevalue="--complement F" label="Check both DNA strands against the database"/>

		<param name="heuristics" type="boolean" checked="false" truevalue="--heuristics T" falsevalue="--heuristics F" label="Use HMMER's heuristic filtering"/>

		<param name="preserve" type="boolean" checked="false" truevalue="--preserve T" falsevalue="--preserve F" label=" Preserve sequence headers instead of printing out ITSx headers"/>

		<param name="truncate" type="boolean" checked="true" truevalue="--truncate T" falsevalue="--truncate F" label="Remove ends of ITS sequences if they are outside of the ITS region"/>
  	</inputs>

	<outputs>
		<data name="ITS1" format="fasta" label="Extracted ITS1 Fasta File" from_work_dir="ITSx_out.ITS1.fasta"/>
		<data name="ITS2" format="fasta" label="Extracted ITS2 Fasta File" from_work_dir="ITSx_out.ITS2.fasta"/>
		<data name="fullfasta" format="fasta" label="Full Fasta of ITS extracted" from_work_dir="ITSx_out.full.fasta"/>
		<data name="graph" format="txt" label="Graph of ITS regions" from_work_dir="ITSx_out.graph"/>
		<data name="nodetect" format="fasta" label="No ITS region" from_work_dir="ITSx_out_no_detections.fasta"/>
		<data name="positions" format="tabular" label="ITSx feature positions" from_work_dir="ITSx_out.positions.txt"/>
		<data name="problematic" format="tabular" label="Problematic sequences" from_work_dir="ITSx_out.problematic.txt"/>
		<data name="summary" format="txt" label="ITSx summary" from_work_dir="ITSx_out.summary.txt"/>
		<data name="extractions" format="tabular" label="ITSx extraction results" from_work_dir="ITSx_out.extraction.results"/>
	</outputs>

	<tests>
		<test>
			<param name="input" value="testITSsequences.fasta"/>
			<param name="domain_eval" value="0.00001" />
			<param name="domain_score" value="0" />
			<param name="domain_num" value="2" />
			<param name="search" value="eval" />
			<param name="eval_cutoff" value="0.01" />
			<param name="reset" value="--reset T" />
			<param name="allow_reorder" value="--allow_reorder F" />
			<param name="complement" value="--complement T"/>
			<param name="heuristics" value="--heuristics F" />
			<param name="preserve" value="--preserve F" />
			<param name="truncate" value="--truncate T" />
			<output name="ITS1" file="expectedOutput.ITS1.fasta" />
			<output name="ITS2" file="expectedOutput.ITS2.fasta" />
			<output name="fullfasta" file="expectedOutput.full.fasta" />
			<output name="graph" file="expectedOutput.graph" />
			<!-- This output is commented out because it is empty -->
			<!--<output name="nodetect" file="expectedOutput_no_detections.fasta" />-->
			<output name="positions" file="expectedOutput.positions.txt" />
			<!-- This output is commented out because it causes the test to fail (due to the difference in time that gets printed out in the expected output and the output of the test) -->
			<!--<output name="summary" file="expectedOutput.summary.txt" />-->
			<output name="problematic" file="expectedOutput.problematic.txt" />
			<output name="extractions" file="expectedOutput.extractions.txt" />
		</test>
	</tests>

	<help>
**Description**

Identifies ITS sequences and extracts the ITS regions.

ITSx is an open source software utility to extract the highly variable ITS1 and ITS2 subregions from ITS sequences, which is commonly used as a molecular barcode for e.g. fungi. As the inclusion of parts of the neighbouring, very conserved, ribosomal genes (SSU, 5S and LSU rRNA sequences) in the sequence identification process can lead to severely misleading results, ITSx identifies and extracts only the ITS regions themselves.
For more information regarding the settings of the tool, please visit the ITSx Users Guide on http://microbiology.se/publ/itsx_users_guide.pdf

-----

**Inputs and Outputs**

- Inputs:
	+ Accepts input in the FASTA format. It is possible to input both aligned and unaligned FASTA files, containing both DNA and RNA sequences

- Outputs:
	+ Summary file of the entire run
	+ One or more detailed table containing the positions in the respective sequences where the ITS subregions were found
	+ Semi-graphical representation of hits
	+ FASTA file of all identified sequences
	+ FASTA file for ITS1 regions
	+ FASTA file for ITS2 regions
	+ A list of sequence IDs representing the entries (if they exist) that did not contain any ITS region
	+ A file containing chimeric sequences (if they are found)
	+ A file for problematic sequences

-----

**Tool Information**

- ITSx tool v1.0.11:
	+ Release Date: December 2014
	+ URL: http://microbiology.se/software/itsx/
	+ Source URL: http://microbiology.se/sw/ITSx_1.0.11.tar.gz
	+ Copyright (C) 2012-2013 Johan Bengtsson-Palme et al.

-----

**Contact Information**

- For more information, contact:
	+ AAFC-MBB Team
	+ E-mail: mbb@agr.gc.ca
	+ Agriculture and Agri-foods Canada, Ottawa, ON, Canada
	</help>

	<citations>
		<citation type="bibtex">@ARTICLE{a1,
		author = {Bengtsson-Palme, Johan},
		title = {ITSx: Improved software detection and extraction of ITS1 and ITS2 from ribosomal ITS sequences of fungi and other eukaryotes for use in environmental sequencing},
		year = {2013}
		}</citation>
	</citations>
</tool>
author	aafc-mbb
date	Mon, 14 Mar 2016 16:15:08 -0400
parents
children