Mercurial > repos > bgruening > fastq_info
changeset 0:7a821cd57b43 draft default tip
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/fastq_info commit abe695da7640bbd2b22980452de7a5cf6698638b"
author | bgruening |
---|---|
date | Sun, 12 Sep 2021 19:28:19 +0000 |
parents | |
children | |
files | fastq_info.xml macros.xml test-data/R1.fastq test-data/R1.fastq.gz test-data/R1_duplicated.fastq test-data/R1_truncated.fastq test-data/R1_truncated_entry.fastq test-data/R2.fastq test-data/output.txt |
diffstat | 9 files changed, 379 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_info.xml Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,160 @@ +<tool id="fastq_info" name="FASTQ info" version="@TOOL_VERSION@+Galaxy@SUFFIX_VERSION@" profile="20.01"> + <description>validates single or paired fastq files</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ + fastq_info + #if $input_option.input_type == 'single' + $input_option.fastq_1 + #else + $input_option.fastq_1 + $input_option.fastq_2 + #end if + $r + $q + 2>&1 | tee ./output.txt + ]]> </command> + <inputs> + <conditional name="input_option"> + <param name="input_type" type="select" label="Single reads or paired reads" help="Select between paired and single end data"> + <option value="single">Single</option> + <option value="paired">Paired</option> + </param> + <when value="single"> + <param name="fastq_1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="FASTQ file" help="Input FASTQ file"/> + </when> + <when value="paired"> + <param name="fastq_1" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="FASTQ file: forward reads"/> + <param name="fastq_2" type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" label="FASTQ file: reverse reads"/> + </when> + </conditional> + <param argument="-q" type="boolean" truevalue="" falsevalue="-q" checked="True" label="Determine quality scoring" help="If this option is disabled, it won't fail if the quality encoding cannot be determined" /> + <param argument="-r" type="boolean" truevalue="-r" falsevalue="" checked="False" label="Skip check for duplicated readnames" help="By using this option no checks are made to determine if the read names/identifiers are unique (it will run faster and use less memory)" /> + </inputs> + <outputs> + <data name="output" format="txt" from_work_dir="./output.txt" label="${tool.name} on ${on_string}: Validation result"> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="input_option"> + <param name="input_type" value="single"/> + <param name="fastq_1" value="R1.fastq"/> + </conditional> + <output name="output" file="output.txt" ftype="txt" lines_diff="2"> + <assert_contents> + <has_text text="OK"/> + <has_size value="362" delta="100"/> + </assert_contents> + </output> + </test> + <!--Test gzip files--> + <test expect_num_outputs="1"> + <conditional name="input_option"> + <param name="input_type" value="single"/> + <param name="fastq_1" value="R1.fastq.gz"/> + </conditional> + <output name="output" ftype="txt"> + <assert_contents> + <has_text text="OK"/> + <has_size value="368" delta="100"/> + </assert_contents> + </output> + </test> + <!--Test paired reads--> + <test expect_num_outputs="1"> + <conditional name="input_option"> + <param name="input_type" value="paired"/> + <param name="fastq_1" value="R1.fastq"/> + <param name="fastq_2" value="R2.fastq"/> + </conditional> + <output name="output"> + <assert_contents> + <has_text text="OK"/> + <has_size value="544" delta="100"/> + </assert_contents> + </output> + </test> + <!-- Test q option--> + <test expect_num_outputs="1"> + <param name="q" value="False"/> + <conditional name="input_option"> + <param name="input_type" value="single"/> + <param name="fastq_1" value="R1.fastq"/> + </conditional> + <output name="output"> + <assert_contents> + <has_text text="OK"/> + <has_size value="362" delta="100"/> + </assert_contents> + </output> + </test> + <!-- Test duplicated id --> + <test expect_num_outputs="1"> + <conditional name="input_option"> + <param name="input_type" value="single"/> + <param name="fastq_1" value="R1_duplicated.fastq"/> + </conditional> + <output name="output"> + <assert_contents> + <has_text text="line 36: duplicated sequence SRR1294492.1207358/1"/> + <has_size value="311" delta="100"/> + </assert_contents> + </output> + </test> + <!-- Test r option--> + <test expect_num_outputs="1"> + <param name="r" value="True"/> + <conditional name="input_option"> + <param name="input_type" value="single"/> + <param name="fastq_1" value="R1_duplicated.fastq"/> + </conditional> + <output name="output"> + <assert_contents> + <has_text text="OK"/> + <has_size value="193" delta="100"/> + </assert_contents> + </output> + </test> + <!-- Test truncated file 01 --> + <test expect_num_outputs="1"> + <conditional name="input_option"> + <param name="input_type" value="paired"/> + <param name="fastq_1" value="R1_truncated.fastq"/> + <param name="fastq_2" value="R2.fastq"/> + </conditional> + <output name="output"> + <assert_contents> + <has_text text="line 32: file truncated"/> + <has_size value="285" delta="100"/> + </assert_contents> + </output> + </test> + <!-- Test truncated file 02 --> + <test expect_num_outputs="1"> + <conditional name="input_option"> + <param name="input_type" value="paired"/> + <param name="fastq_1" value="R1_truncated_entry.fastq"/> + <param name="fastq_2" value="R2.fastq"/> + </conditional> + <output name="output"> + <assert_contents> + <has_text text="unpaired read - SRR1294492.2047212/"/> + <has_size value="557" delta="100"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**Purpose** + +FASTQ info is part of `FASTQ utils <https://github.com/nunofonseca/fastq_utils>`_, a set of Linux utilities to validate and manipulate fastq files. + + ]]> </help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,31 @@ +<macros> + <token name="@TOOL_VERSION@">0.25.1</token> + <token name="@SUFFIX_VERSION@">0</token> + <xml name="edam_ontology"> + <edam_topics> + <edam_topic>topic_0091</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0336</edam_operation> + </edam_operations> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">fastq_utils</requirement> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{githubFastqUtils, + author = {Fonseca, Nuno }, + year = {2021}, + title = {fastq_utils}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/nunofonseca/fastq_utils}, + } + </citation> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/R1.fastq Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,36 @@ +@SRR1294492.1768716/1 +AAAAAAAAAAAAGAAGGTAAAAAGC ++ +JJJJJJJJJJJJHHHHHFFFFFCCC +@SRR1294492.1759773/1 +GATTACAGCTGTGAGCCACTGTGCC ++ +@?@DDA>DHHHDFA4:CGHIICF9E +@SRR1294492.2642415/1 +AGATCGAGACCATCATGGCTAACAC ++ +JJJJJJJIHJHGA4+HHFFFFFC@@ +@SRR1294492.922584/1 +AGTTCAAGACCAGCCTGACCAACAT ++ +IJJJJJIHHJIIHHHHFDFFFFCC@ +@SRR1294492.2978607/1 +GTTGCCCAGGCTGGAGTGCAGTGGC ++ +BBCFFFFFHHHHHJJJHIIJJHIJJ +@SRR1294492.123278/1 +GATCACAAGGTCATGAGATCAAGAC ++ +CCCFFFFFHHFHHIJIJIJJJJJIJ +@SRR1294492.2199602/1 +CTCCCAAGACTAAACCAGGAAGAAG ++ +CCCFFFFFHHHHHJJJJJJJJJJJJ +@SRR1294492.1207358/1 +GGCCAACATGGTGAAACCCCATGTC ++ +CCCFFFFFHHHFHJJJJJJJJJJIJ +@SRR1294492.2047212/1 +TCCCAGGTTCAAGCAATTCTCCTGC ++ +BBBDFFFDHHHHHIJJJJJJJJJJJ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/R1_duplicated.fastq Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,36 @@ +@SRR1294492.1768716/1 +AAAAAAAAAAAAGAAGGTAAAAAGC ++ +JJJJJJJJJJJJHHHHHFFFFFCCC +@SRR1294492.1759773/1 +GATTACAGCTGTGAGCCACTGTGCC ++ +@?@DDA>DHHHDFA4:CGHIICF9E +@SRR1294492.2642415/1 +AGATCGAGACCATCATGGCTAACAC ++ +JJJJJJJIHJHGA4+HHFFFFFC@@ +@SRR1294492.922584/1 +AGTTCAAGACCAGCCTGACCAACAT ++ +IJJJJJIHHJIIHHHHFDFFFFCC@ +@SRR1294492.2978607/1 +GTTGCCCAGGCTGGAGTGCAGTGGC ++ +BBCFFFFFHHHHHJJJHIIJJHIJJ +@SRR1294492.123278/1 +GATCACAAGGTCATGAGATCAAGAC ++ +CCCFFFFFHHFHHIJIJIJJJJJIJ +@SRR1294492.2199602/1 +CTCCCAAGACTAAACCAGGAAGAAG ++ +CCCFFFFFHHHHHJJJJJJJJJJJJ +@SRR1294492.1207358/1 +GGCCAACATGGTGAAACCCCATGTC ++ +CCCFFFFFHHHFHJJJJJJJJJJIJ +@SRR1294492.1207358/1 +GGCCAACATGGTGAAACCCCATGTC ++ +CCCFFFFFHHHFHJJJJJJJJJJIJ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/R1_truncated.fastq Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,35 @@ +@SRR1294492.1768716/1 +AAAAAAAAAAAAGAAGGTAAAAAGC ++ +JJJJJJJJJJJJHHHHHFFFFFCCC +@SRR1294492.1759773/1 +GATTACAGCTGTGAGCCACTGTGCC ++ +@?@DDA>DHHHDFA4:CGHIICF9E +@SRR1294492.2642415/1 +AGATCGAGACCATCATGGCTAACAC ++ +JJJJJJJIHJHGA4+HHFFFFFC@@ +@SRR1294492.922584/1 +AGTTCAAGACCAGCCTGACCAACAT ++ +IJJJJJIHHJIIHHHHFDFFFFCC@ +@SRR1294492.2978607/1 +GTTGCCCAGGCTGGAGTGCAGTGGC ++ +BBCFFFFFHHHHHJJJHIIJJHIJJ +@SRR1294492.123278/1 +GATCACAAGGTCATGAGATCAAGAC ++ +CCCFFFFFHHFHHIJIJIJJJJJIJ +@SRR1294492.2199602/1 +CTCCCAAGACTAAACCAGGAAGAAG ++ +CCCFFFFFHHHHHJJJJJJJJJJJJ +@SRR1294492.1207358/1 +GGCCAACATGGTGAAACCCCATGTC ++ +CCCFFFFFHHHFHJJJJJJJJJJIJ +@SRR1294492.2047212/1 +TCCCAGGTTCAAGCAATTCTCCTGC ++ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/R1_truncated_entry.fastq Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,32 @@ +@SRR1294492.1768716/1 +AAAAAAAAAAAAGAAGGTAAAAAGC ++ +JJJJJJJJJJJJHHHHHFFFFFCCC +@SRR1294492.1759773/1 +GATTACAGCTGTGAGCCACTGTGCC ++ +@?@DDA>DHHHDFA4:CGHIICF9E +@SRR1294492.2642415/1 +AGATCGAGACCATCATGGCTAACAC ++ +JJJJJJJIHJHGA4+HHFFFFFC@@ +@SRR1294492.922584/1 +AGTTCAAGACCAGCCTGACCAACAT ++ +IJJJJJIHHJIIHHHHFDFFFFCC@ +@SRR1294492.2978607/1 +GTTGCCCAGGCTGGAGTGCAGTGGC ++ +BBCFFFFFHHHHHJJJHIIJJHIJJ +@SRR1294492.123278/1 +GATCACAAGGTCATGAGATCAAGAC ++ +CCCFFFFFHHFHHIJIJIJJJJJIJ +@SRR1294492.2199602/1 +CTCCCAAGACTAAACCAGGAAGAAG ++ +CCCFFFFFHHHHHJJJJJJJJJJJJ +@SRR1294492.1207358/1 +GGCCAACATGGTGAAACCCCATGTC ++ +CCCFFFFFHHHFHJJJJJJJJJJIJ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/R2.fastq Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,36 @@ +@SRR1294492.1768716/2 +TCACCCAGGCTGGTGTGTAATGGTG ++ +@2233332++<+02+?2,+7AA=>+ +@SRR1294492.1759773/2 +GTATTAGAGCTTGTATTTTAGATTT ++ +@@8DDD>=DCFFBCGGHICD@FHHE +@SRR1294492.2642415/2 +GTGAGCCACCGTGCCCAGCCTATCC ++ +IJJIIIHHJJJJHHHHHFFFFF@CC +@SRR1294492.922584/2 +AACAGTTTGGCTGGATACATAATCC ++ +IJJJJJJIJJIJHHHHHFFFFFCCC +@SRR1294492.2978607/2 +GGCATATGCCACCATGCCCAGATAA ++ +@C@FFFFFHHHHHJJJJJJJJJJJJ +@SRR1294492.123278/2 +TCTCCGGCCTCAGCCTCCTGAGCGG ++ +CCCFFDDFHHHHHJJJJJJJJJJJJ +@SRR1294492.2199602/2 +GTATCAGGATGATGCTGTTCTCATA ++ +CCCFFFFFHHHHHJJJJJJJJJJJJ +@SRR1294492.1207358/2 +GTGGTGCGATCTTGGCTCACTGCAA ++ +BBBFBDFFFHHHHJJJIJIJJIIHI +@SRR1294492.2047212/2 +AGACCAGCCTGGCCAAGATGCTGAA ++ +@@CFFFFFHHHHGJJJJJJJJJJJJ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.txt Sun Sep 12 19:28:19 2021 +0000 @@ -0,0 +1,13 @@ +fastq_utils 0.25.1 +DEFAULT_HASHSIZE=39000001 +Scanning and indexing all reads from /tmp/tmp9in9_6me/files/a/e/9/dataset_ae914285-366b-4788-9f38-183f736dc99f.dat +Scanning complete. + +Reads processed: 9 +Memory used in indexing: ~0 MB +------------------------------------ +Number of reads: 9 +Quality encoding range: 43 74 +Quality encoding: 33 +Read length: 25 25 25 +OK