diff rgFastQC.xml @ 20:ddf5c37952ac draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit 89976e27bd11f1da3c95a5df6b67297496a8a89b
author iuc
date Thu, 16 May 2019 07:45:56 -0400
parents 9da02be9c6cc
children e7b2202befea
line wrap: on
line diff
--- a/rgFastQC.xml	Fri May 10 14:23:53 2019 -0400
+++ b/rgFastQC.xml	Thu May 16 07:45:56 2019 -0400
@@ -1,9 +1,17 @@
-<tool id="fastqc" name="FastQC" version="0.72">
+<tool id="fastqc" name="FastQC" version="0.72+galaxy1">
     <description>Read Quality reports</description>
     <requirements>
-        <requirement type="package" version="0.11.7">fastqc</requirement>
+        <requirement type="package" version="0.11.8">fastqc</requirement>
     </requirements>
-    <command detect_errors="exit_code"><![CDATA[
+    <stdio>
+        <exit_code range="1:" level="fatal" description="FastQC returned non zero exit code" />
+        <exit_code range=":-1" level="fatal" description="FastQC returned non zero exit code" />
+    <regex match="There is insufficient memory for the Java Runtime Environment"
+           source="stdout"
+           level="fatal_oom"
+           description="Out of memory error occurred" />
+    </stdio>
+    <command><![CDATA[
         #import re
         #set input_name = re.sub('[^\w\-\s]', '_', str($input_file.element_identifier))
 
@@ -27,17 +35,25 @@
         mkdir -p '${html_file.files_path}' &&
         fastqc
             --outdir '${html_file.files_path}'
-
             #if $contaminants.dataset and str($contaminants) > ''
                 --contaminants '${contaminants}'
             #end if
 
+            #if $adapters.dataset and str($adapters) > ''
+                --adapters '${adapters}'
+            #end if
+
             #if $limits.dataset and str($limits) > ''
                 --limits '${limits}'
             #end if
 
             --quiet
             --extract
+            #if $min_length:
+                --min_length $min_length
+            #end if
+            $nogroup
+            --kmers $kmers
             -f '${format}'
             '${input_file_sl}'
 
@@ -50,8 +66,17 @@
                label="Short read data from your current history" />
         <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
                help="tab delimited file with 2 columns: name and sequence.  For example: Illumina Small RNA RT Primer CAAGCAGAAGACGGCATACGA" />
+        <param argument="--adapters" type="data" format="tabular" optional="true" label="Adapter list"
+               help="list of adapters adapter sequences which will be explicity searched against the library. tab delimited file with 2 columns: name and sequence." />
         <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
                help="a file that specifies which submodules are to be executed (default=all) and also specifies the thresholds for the each submodules warning parameter" />
+        <param argument="--nogroup" type="boolean" truevalue="--nogroup" falsevalue="" checked="False"
+               label="Disable grouping of bases for reads >50bp" help="Using this option will cause fastqc to crash and burn if you use it on really long reads, and your plots may end up a ridiculous size. You have been warned!"/>
+        <param argument="--min_length" type="integer" value="" optional="true"
+               label="Lower limit on the length of the sequence to be shown in the report"
+               help=" As long as you set this to a value greater or equal to your longest read length then this will be the sequence length used to create your read groups.  This can be useful for making directly comaparable statistics from datasets with somewhat variable read lengths."/>
+        <param argument="--kmers" type="integer" value="7" min="2" max="10"
+               label="length of Kmer to look for" help="note: the Kmer test is disabled and needs to be enabled using a custom Submodule and limits file"/>
     </inputs>
     <outputs>
         <data format="html" name="html_file" from_work_dir="output.html" label="${tool.name} on ${on_string}: Webpage" />
@@ -59,43 +84,67 @@
     </outputs>
     <tests>
         <test>
-            <param name="input_file" value="1000gsample.fastq" />
+            <param name="input_file" value="1000trimmed.fastq" />
+            <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
+            <output name="text_file" file="fastqc_data.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
             <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
-            <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
-            <output name="text_file" file="fastqc_data.txt" ftype="txt" lines_diff="4"/>
+            <output name="html_file" file="fastqc_report_contaminants.html" ftype="html" lines_diff="100"/>
+            <output name="text_file" file="fastqc_data_contaminants.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <param name="adapters" value="fastqc_adapters.txt" ftype="tabular" />
+            <output name="html_file" file="fastqc_report_adapters.html" ftype="html" lines_diff="100"/>
+            <output name="text_file" file="fastqc_data_adapters.txt" ftype="txt"/>
+        </test>
+        <test>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <param name="limits" value="fastqc_customlimits.txt" ftype="txt" />
+            <output name="html_file" file="fastqc_report_customlimits.html" ftype="html" lines_diff="100"/>
+            <output name="text_file" file="fastqc_data_customlimits.txt" ftype="txt"/>
         </test>
         <test>
-            <param name="input_file" value="1000gsample.fastq" />
+            <param name="input_file" value="1000trimmed.fastq" ftype="fastq" />
+            <param name="kmers" value="3" />
             <param name="limits" value="fastqc_customlimits.txt" ftype="txt" />
-            <output name="html_file" file="fastqc_report2.html" ftype="html" lines_diff="100"/>
-            <output name="text_file" file="fastqc_data2.txt" ftype="txt" lines_diff="4"/>
+            <output name="html_file" file="fastqc_report_kmer.html" ftype="html" lines_diff="100"/>
+            <output name="text_file" file="fastqc_data_kmer.txt" ftype="txt"/>
+            <assert_command>
+                <has_text text="--kmers 3"/>
+            </assert_command>
         </test>
         <test>
-            <param name="input_file" value="1000gsample.fastq.gz" ftype="fastq.gz" />
-            <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
-            <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
-            <output name="text_file" file="fastqc_data.txt" ftype="txt" lines_diff="4"/>
+            <param name="input_file" value="1000trimmed.fastq" />
+            <param name="min_length" value="108" />
+            <output name="html_file" file="fastqc_report_min_length.html" ftype="html" lines_diff="100"/>
+            <output name="text_file" file="fastqc_data_min_length.txt" ftype="txt"/>
         </test>
         <test>
-            <param name="input_file" value="1000gsample.fastq.bz2" ftype="fastq.bz2" />
-            <param name="contaminants" value="fastqc_contaminants.txt" ftype="tabular" />
-            <output name="html_file" file="fastqc_report.html" ftype="html" lines_diff="100"/>
-            <output name="text_file" file="fastqc_data.txt" ftype="txt" lines_diff="4"/>
+            <param name="input_file" value="1000trimmed.fastq" ftype="fastq" />
+            <param name="nogroup" value="--nogroup" />
+            <output name="html_file" file="fastqc_report_nogroup.html" ftype="html" lines_diff="100"/>
+            <output name="text_file" file="fastqc_data_nogroup.txt" ftype="txt"/>
+            <assert_command>
+                <has_text text="--nogroup"/>
+            </assert_command>
         </test>
         <test>
             <param name="input_file" value="hisat_output_1.bam" ftype="bam" />
             <output name="html_file" file="fastqc_report_hisat.html" ftype="html" lines_diff="100"/>
-            <output name="text_file" file="fastqc_data_hisat.txt" ftype="txt" lines_diff="4"/>
+            <output name="text_file" file="fastqc_data_hisat.txt" ftype="txt"/>
         </test>
     </tests>
-    <help>
+    <help><![CDATA[
 .. class:: infomark
 
 **Purpose**
 
 FastQC aims to provide a simple way to do some quality control checks on raw
 sequence data coming from high throughput sequencing pipelines.
-It provides a modular set of analyses which you can use to give a quick
+It provides a set of analyses which you can use to get a quick
 impression of whether your data has any problems of
 which you should be aware before doing any further analysis.
 
@@ -153,7 +202,7 @@
 All except Basic Statistics and Overrepresented sequences are plots.
  .. _FastQC: http://www.bioinformatics.babraham.ac.uk/projects/fastqc/
  .. _Picard-tools: https://broadinstitute.github.io/picard/
-    </help>
+    ]]></help>
     <citations>
         <citation type="bibtex">
         @unpublished{andrews_s,