changeset 0:7a821cd57b43 draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/fastq_info commit abe695da7640bbd2b22980452de7a5cf6698638b"
author bgruening
date Sun, 12 Sep 2021 19:28:19 +0000
parents
children
files fastq_info.xml macros.xml test-data/R1.fastq test-data/R1.fastq.gz test-data/R1_duplicated.fastq test-data/R1_truncated.fastq test-data/R1_truncated_entry.fastq test-data/R2.fastq test-data/output.txt
diffstat 9 files changed, 379 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_info.xml	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,160 @@
+<tool id="fastq_info" name="FASTQ info" version="@TOOL_VERSION@+Galaxy@SUFFIX_VERSION@" profile="20.01">
+    <description>validates single or paired fastq files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="edam_ontology"/>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+        fastq_info
+        #if $input_option.input_type == 'single'
+            $input_option.fastq_1
+        #else
+            $input_option.fastq_1
+            $input_option.fastq_2
+        #end if
+        $r
+        $q
+        2>&1 | tee ./output.txt
+    ]]>    </command>
+    <inputs>
+        <conditional name="input_option">
+            <param name="input_type" type="select" label="Single reads or paired reads" help="Select between paired and single end data">
+                <option value="single">Single</option>
+                <option value="paired">Paired</option>
+            </param>
+            <when value="single">
+                <param name="fastq_1"  type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="FASTQ file" help="Input FASTQ file"/>
+            </when>
+            <when value="paired">
+                <param name="fastq_1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="FASTQ file: forward reads"/>
+                <param name="fastq_2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="FASTQ file: reverse reads"/>
+            </when>
+        </conditional>
+        <param argument="-q" type="boolean" truevalue="" falsevalue="-q" checked="True" label="Determine quality scoring" help="If this option is disabled, it won't fail if the quality encoding cannot be determined" />
+        <param argument="-r" type="boolean" truevalue="-r" falsevalue="" checked="False" label="Skip check for duplicated readnames" help="By using this option no checks are made to determine if the read names/identifiers are unique (it will run faster and use less memory)" />
+    </inputs>
+    <outputs>
+        <data name="output" format="txt" from_work_dir="./output.txt" label="${tool.name} on ${on_string}: Validation result">
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="input_option">
+                <param name="input_type" value="single"/>
+                <param name="fastq_1" value="R1.fastq"/>
+            </conditional>
+            <output name="output" file="output.txt" ftype="txt" lines_diff="2">
+                <assert_contents>
+                    <has_text text="OK"/>
+                    <has_size value="362" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!--Test gzip files-->
+        <test expect_num_outputs="1">
+            <conditional name="input_option">
+                <param name="input_type" value="single"/>
+                <param name="fastq_1" value="R1.fastq.gz"/>
+            </conditional>
+            <output name="output" ftype="txt">
+                <assert_contents>
+                    <has_text text="OK"/>
+                    <has_size value="368" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!--Test paired reads-->
+        <test expect_num_outputs="1">
+            <conditional name="input_option">
+                <param name="input_type" value="paired"/>
+                <param name="fastq_1" value="R1.fastq"/>
+                <param name="fastq_2" value="R2.fastq"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="OK"/>
+                    <has_size value="544" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test q option-->
+        <test expect_num_outputs="1">
+            <param name="q" value="False"/>
+            <conditional name="input_option">
+                <param name="input_type" value="single"/>
+                <param name="fastq_1" value="R1.fastq"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="OK"/>
+                    <has_size value="362" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test duplicated id -->
+        <test expect_num_outputs="1">
+            <conditional name="input_option">
+                <param name="input_type" value="single"/>
+                <param name="fastq_1" value="R1_duplicated.fastq"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="line 36: duplicated sequence SRR1294492.1207358/1"/>
+                    <has_size value="311" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test r option-->
+        <test expect_num_outputs="1">
+            <param name="r" value="True"/>
+            <conditional name="input_option">
+                <param name="input_type" value="single"/>
+                <param name="fastq_1" value="R1_duplicated.fastq"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="OK"/>
+                    <has_size value="193" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test truncated file 01 -->
+        <test expect_num_outputs="1">
+            <conditional name="input_option">
+                <param name="input_type" value="paired"/>
+                <param name="fastq_1" value="R1_truncated.fastq"/>
+                <param name="fastq_2" value="R2.fastq"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="line 32: file truncated"/>
+                    <has_size value="285" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test truncated file 02 -->
+        <test expect_num_outputs="1">
+            <conditional name="input_option">
+                <param name="input_type" value="paired"/>
+                <param name="fastq_1" value="R1_truncated_entry.fastq"/>
+                <param name="fastq_2" value="R2.fastq"/>
+            </conditional>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="unpaired read - SRR1294492.2047212/"/>
+                    <has_size value="557" delta="100"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**Purpose**
+
+FASTQ info is part of `FASTQ utils <https://github.com/nunofonseca/fastq_utils>`_, a set of Linux utilities to validate and manipulate fastq files. 
+
+    ]]>    </help>
+    <expand macro="citations"/>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,31 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.25.1</token>
+    <token name="@SUFFIX_VERSION@">0</token>
+    <xml name="edam_ontology">
+        <edam_topics>
+            <edam_topic>topic_0091</edam_topic>
+        </edam_topics>
+        <edam_operations>
+            <edam_operation>operation_0336</edam_operation>
+        </edam_operations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">fastq_utils</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="bibtex">                                                                   
+                @misc{githubFastqUtils,                                                                   
+                author = {Fonseca, Nuno },                                                    
+                year = {2021},                                                                 
+                title = {fastq_utils},                                                        
+                publisher = {GitHub},
+                journal = {GitHub repository},                                               
+                url = {https://github.com/nunofonseca/fastq_utils},                        
+                }                                                                    
+            </citation>
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/R1.fastq	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,36 @@
+@SRR1294492.1768716/1
+AAAAAAAAAAAAGAAGGTAAAAAGC
++
+JJJJJJJJJJJJHHHHHFFFFFCCC
+@SRR1294492.1759773/1
+GATTACAGCTGTGAGCCACTGTGCC
++
+@?@DDA>DHHHDFA4:CGHIICF9E
+@SRR1294492.2642415/1
+AGATCGAGACCATCATGGCTAACAC
++
+JJJJJJJIHJHGA4+HHFFFFFC@@
+@SRR1294492.922584/1
+AGTTCAAGACCAGCCTGACCAACAT
++
+IJJJJJIHHJIIHHHHFDFFFFCC@
+@SRR1294492.2978607/1
+GTTGCCCAGGCTGGAGTGCAGTGGC
++
+BBCFFFFFHHHHHJJJHIIJJHIJJ
+@SRR1294492.123278/1
+GATCACAAGGTCATGAGATCAAGAC
++
+CCCFFFFFHHFHHIJIJIJJJJJIJ
+@SRR1294492.2199602/1
+CTCCCAAGACTAAACCAGGAAGAAG
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+@SRR1294492.1207358/1
+GGCCAACATGGTGAAACCCCATGTC
++
+CCCFFFFFHHHFHJJJJJJJJJJIJ
+@SRR1294492.2047212/1
+TCCCAGGTTCAAGCAATTCTCCTGC
++
+BBBDFFFDHHHHHIJJJJJJJJJJJ
\ No newline at end of file
Binary file test-data/R1.fastq.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/R1_duplicated.fastq	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,36 @@
+@SRR1294492.1768716/1
+AAAAAAAAAAAAGAAGGTAAAAAGC
++
+JJJJJJJJJJJJHHHHHFFFFFCCC
+@SRR1294492.1759773/1
+GATTACAGCTGTGAGCCACTGTGCC
++
+@?@DDA>DHHHDFA4:CGHIICF9E
+@SRR1294492.2642415/1
+AGATCGAGACCATCATGGCTAACAC
++
+JJJJJJJIHJHGA4+HHFFFFFC@@
+@SRR1294492.922584/1
+AGTTCAAGACCAGCCTGACCAACAT
++
+IJJJJJIHHJIIHHHHFDFFFFCC@
+@SRR1294492.2978607/1
+GTTGCCCAGGCTGGAGTGCAGTGGC
++
+BBCFFFFFHHHHHJJJHIIJJHIJJ
+@SRR1294492.123278/1
+GATCACAAGGTCATGAGATCAAGAC
++
+CCCFFFFFHHFHHIJIJIJJJJJIJ
+@SRR1294492.2199602/1
+CTCCCAAGACTAAACCAGGAAGAAG
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+@SRR1294492.1207358/1
+GGCCAACATGGTGAAACCCCATGTC
++
+CCCFFFFFHHHFHJJJJJJJJJJIJ
+@SRR1294492.1207358/1
+GGCCAACATGGTGAAACCCCATGTC
++
+CCCFFFFFHHHFHJJJJJJJJJJIJ
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/R1_truncated.fastq	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,35 @@
+@SRR1294492.1768716/1
+AAAAAAAAAAAAGAAGGTAAAAAGC
++
+JJJJJJJJJJJJHHHHHFFFFFCCC
+@SRR1294492.1759773/1
+GATTACAGCTGTGAGCCACTGTGCC
++
+@?@DDA>DHHHDFA4:CGHIICF9E
+@SRR1294492.2642415/1
+AGATCGAGACCATCATGGCTAACAC
++
+JJJJJJJIHJHGA4+HHFFFFFC@@
+@SRR1294492.922584/1
+AGTTCAAGACCAGCCTGACCAACAT
++
+IJJJJJIHHJIIHHHHFDFFFFCC@
+@SRR1294492.2978607/1
+GTTGCCCAGGCTGGAGTGCAGTGGC
++
+BBCFFFFFHHHHHJJJHIIJJHIJJ
+@SRR1294492.123278/1
+GATCACAAGGTCATGAGATCAAGAC
++
+CCCFFFFFHHFHHIJIJIJJJJJIJ
+@SRR1294492.2199602/1
+CTCCCAAGACTAAACCAGGAAGAAG
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+@SRR1294492.1207358/1
+GGCCAACATGGTGAAACCCCATGTC
++
+CCCFFFFFHHHFHJJJJJJJJJJIJ
+@SRR1294492.2047212/1
+TCCCAGGTTCAAGCAATTCTCCTGC
++
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/R1_truncated_entry.fastq	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,32 @@
+@SRR1294492.1768716/1
+AAAAAAAAAAAAGAAGGTAAAAAGC
++
+JJJJJJJJJJJJHHHHHFFFFFCCC
+@SRR1294492.1759773/1
+GATTACAGCTGTGAGCCACTGTGCC
++
+@?@DDA>DHHHDFA4:CGHIICF9E
+@SRR1294492.2642415/1
+AGATCGAGACCATCATGGCTAACAC
++
+JJJJJJJIHJHGA4+HHFFFFFC@@
+@SRR1294492.922584/1
+AGTTCAAGACCAGCCTGACCAACAT
++
+IJJJJJIHHJIIHHHHFDFFFFCC@
+@SRR1294492.2978607/1
+GTTGCCCAGGCTGGAGTGCAGTGGC
++
+BBCFFFFFHHHHHJJJHIIJJHIJJ
+@SRR1294492.123278/1
+GATCACAAGGTCATGAGATCAAGAC
++
+CCCFFFFFHHFHHIJIJIJJJJJIJ
+@SRR1294492.2199602/1
+CTCCCAAGACTAAACCAGGAAGAAG
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+@SRR1294492.1207358/1
+GGCCAACATGGTGAAACCCCATGTC
++
+CCCFFFFFHHHFHJJJJJJJJJJIJ
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/R2.fastq	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,36 @@
+@SRR1294492.1768716/2
+TCACCCAGGCTGGTGTGTAATGGTG
++
+@2233332++<+02+?2,+7AA=>+
+@SRR1294492.1759773/2
+GTATTAGAGCTTGTATTTTAGATTT
++
+@@8DDD>=DCFFBCGGHICD@FHHE
+@SRR1294492.2642415/2
+GTGAGCCACCGTGCCCAGCCTATCC
++
+IJJIIIHHJJJJHHHHHFFFFF@CC
+@SRR1294492.922584/2
+AACAGTTTGGCTGGATACATAATCC
++
+IJJJJJJIJJIJHHHHHFFFFFCCC
+@SRR1294492.2978607/2
+GGCATATGCCACCATGCCCAGATAA
++
+@C@FFFFFHHHHHJJJJJJJJJJJJ
+@SRR1294492.123278/2
+TCTCCGGCCTCAGCCTCCTGAGCGG
++
+CCCFFDDFHHHHHJJJJJJJJJJJJ
+@SRR1294492.2199602/2
+GTATCAGGATGATGCTGTTCTCATA
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+@SRR1294492.1207358/2
+GTGGTGCGATCTTGGCTCACTGCAA
++
+BBBFBDFFFHHHHJJJIJIJJIIHI
+@SRR1294492.2047212/2
+AGACCAGCCTGGCCAAGATGCTGAA
++
+@@CFFFFFHHHHGJJJJJJJJJJJJ
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.txt	Sun Sep 12 19:28:19 2021 +0000
@@ -0,0 +1,13 @@
+fastq_utils 0.25.1
+DEFAULT_HASHSIZE=39000001
+Scanning and indexing all reads from /tmp/tmp9in9_6me/files/a/e/9/dataset_ae914285-366b-4788-9f38-183f736dc99f.dat
+Scanning complete.
+
+Reads processed: 9
+Memory used in indexing: ~0 MB
+------------------------------------
+Number of reads: 9
+Quality encoding range: 43 74
+Quality encoding: 33
+Read length: 25 25 25
+OK