diff SMART/galaxy/WrappGetLetterDistribution.xml @ 38:2c0c0a89fad7

Uploaded
author m-zytnicki
date Thu, 02 May 2013 09:56:47 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SMART/galaxy/WrappGetLetterDistribution.xml	Thu May 02 09:56:47 2013 -0400
@@ -0,0 +1,48 @@
+<tool id="getLetterDistribution1" name="get letter distribution">
+    <description>Calculate distribution for each nucleotide per position for all short reads</description>
+	<requirements>
+		<requirement type="set_environment">PYTHONPATH</requirement>
+	</requirements>
+    <command interpreter="python">
+	    WrappGetLetterDistribution.py -i $inputFileName
+	#if $formatType.FormatInputFileName == 'fasta':
+		-f fasta
+	#else :
+		-f fastq
+	#end if
+	-c $ouputFileNameCSV -a $ouputFileNamePNG1 -b $ouputFileNamePNG2
+    </command>
+    <inputs>
+             <conditional name="formatType">
+      			<param name="FormatInputFileName" type="select" label="Input File Format">
+	        		<option value="fasta">fasta</option>
+       				<option value="fastq" selected="true">fastq</option>
+      			</param>
+      			<when value="fasta">
+             			<param name="inputFileName" format="fasta" type="data" label="Fasta Input File"/>
+      			</when>
+      			<when value="fastq">
+             			<param name="inputFileName" format="fastq" type="data" label="Fastq Input File"/>
+      			</when>
+             </conditional>
+    </inputs>
+        
+    <outputs>
+               	<data name="ouputFileNameCSV" format="tabular" label="[get letter distribution] CSV file"/>
+               	<data name="ouputFileNamePNG1" format="png" label="[get letter distribution] PNG file 1"/>
+               	<data name="ouputFileNamePNG2" format="png" label="[get letter distribution] PNG file 2"/>
+    </outputs>
+    <tests>
+    	<test>
+            <param name="FormatInputFileName" value="fastq" />
+            <param name="inputFileName" value="short_fastq.fastq" />
+            <output name="outputFileNameCSV" file="exp_getletterdistribution_short_fastq.csv" />     
+        </test>
+    </tests>
+
+	<help>
+The script gets the nucleotide distribution of the input sequence list. It outputs two files. The first file shows the nucleotide distribution of the data. More precisely, a point (*x*, *y*) on the curve **A** shows that *y* sequences have *x* % of **A**.
+  
+The second plot shows the average nucleotide distribution for each position of the read. You can use it to detect a bias in the first nucleotides, for instance. A point *x*, *y* on the curve **A** shows that at the position *x*, there are *y*% of **A**. A point (*x*, *y*) on the curve **#** tells you that *y* % of the sequences contain not less than *x* nucleotides. By definition, this latter line is a decreasing function. It usually explains why the tail of the other curves are sometimes erratic: there are few sequences.
+	</help>
+</tool>