Mercurial > repos > iuc > rcorrector
changeset 1:6703b98884a2 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rcorrector commit 65ada0f9589f3ffebad1db6636ccb50d58082606"
author | iuc |
---|---|
date | Thu, 26 Dec 2019 05:21:50 -0500 |
parents | 9a0b65ad3c84 |
children | |
files | FilterUncorrectabledPEfastq.py rcorrector.xml |
diffstat | 2 files changed, 141 insertions(+), 142 deletions(-) [+] |
line wrap: on
line diff
--- a/FilterUncorrectabledPEfastq.py Thu Sep 13 07:00:00 2018 -0400 +++ b/FilterUncorrectabledPEfastq.py Thu Dec 26 05:21:50 2019 -0500 @@ -16,11 +16,9 @@ or gzipped files on the fly, so long as the gzipped files end with 'gz'. """ -# import sys import argparse import gzip -from itertools import izip_longest -# izip +from itertools import zip_longest from os.path import basename @@ -38,7 +36,7 @@ "Collect data into fixed-length chunks or blocks" # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx args = [iter(iterable)] * n - return izip_longest(fillvalue=fillvalue, * args) + return zip_longest(fillvalue=fillvalue, * args) if __name__ == "__main__": @@ -61,9 +59,9 @@ for entry in R1: counter += 1 if counter % 100000 == 0: - print "%s reads processed" % counter + print("%s reads processed" % counter) head1, seq1, placeholder1, qual1 = [i.strip() for i in entry] - head2, seq2, placeholder2, qual2 = [j.strip() for j in R2.next()] + head2, seq2, placeholder2, qual2 = [j.strip() for j in next(R2)] if 'unfixable' in head1 or 'unfixable' in head2: unfix_count += 1 else:
--- a/rcorrector.xml Thu Sep 13 07:00:00 2018 -0400 +++ b/rcorrector.xml Thu Dec 26 05:21:50 2019 -0500 @@ -1,136 +1,137 @@ -<tool id="rcorrector" name="RNA-seq Rcorrector" version="1.0.3"> - <description>a kmer-based error correction method for RNA-seq data</description> - <requirements> - <requirement type="package" version="1.0.3">rcorrector</requirement> - </requirements> - <command detect_errors="exit_code"><![CDATA[ - #if $library.lib == "single": - ln -s '$library.input1' input_1.fq && - #end if - #if $library.lib == "paired": - ln -s '$library.input1' input_1.fq && ln -s '$library.input2' input_2.fq && - #end if - run_rcorrector.pl - #if $library.lib == "single": - -s input_1.fq - #end if - #if $library.lib == "paired": - -1 input_1.fq -2 input_2.fq - #end if - -k '$advanced.kmers' -t \${GALAXY_SLOTS:-4} -maxcorK '$advanced.maxcorK' -wk '$advanced.wk' -ek '$advanced.ek' -od output_file_directory - - #if $library.lib == "paired": - #if $library.filter: - && python '$__tool_directory__/FilterUncorrectabledPEfastq.py' - -1 output_file_directory/input_1.cor.fq -2 output_file_directory/input_2.cor.fq -o fixed 2>&1 > rmunfixable.log && cat rmunfixable.log - #end if - #end if - ]]></command> - <inputs> - <conditional name="library"> - <param name="lib" type="select" label="Is this library paired- or single-end?"> - <option value="single">single</option> - <option value="paired" selected="True">paired</option> - </param> - <when value="single"> - <param type="data" name="input1" label="FastQ file" format="fastq,fastqsanger" /> - </when> - <when value="paired"> - <param type="data" name="input1" label="FastQ file R1 (left)" format="fastq,fastqsanger" /> - <param type="data" name="input2" label="FastQ file R2 (right)" format="fastq,fastqsanger" /> - <param name="filter" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Filter uncorrectable reads" help="This will run FilterUncorrectabledPEfastq and remove uncorrectable reads."/> - </when> - </conditional> - <conditional name="advanced"> - <param name="adv" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Additional options"/> - <when value="TRUE"> - <param name="kmers" label="kmer length" value="23" max="32" type="integer" help="(smaller 33, default: 23)"/> - <param name="maxcorK" label="max correction within k-bp window" value="4" type="integer" help="the maximum number of correction within k-bp window (default: 4)"/> - <param name="wk" label="estimate weak kmer count" value="0.95" type="float" help="the proportion of kmers that are used to estimate weak kmer count threshold, lower for more divergent genome (default: 0.95)"/> - <param name="ek" label="expected number of kmers" value="100000000" type="integer" help="expected_number_of_kmers: does not affect the correctness of program but affect the memory usage (default: 100000000)"/> - </when> - <when value="FALSE"> - <param name="kmers" value="23" type="hidden"/> - <param name="maxcorK" value="4" type="hidden"/> - <param name="wk" value="0.95" type="hidden"/> - <param name="ek" value="100000000" type="hidden"/> - </when> - </conditional> - </inputs> - <outputs> - <data name="output1" format="fastq" label="${tool.name} on ${on_string}" from_work_dir="output_file_directory/input_1.cor.fq"> - <filter>library['lib'] == 'single'</filter> - </data> - <data name="output2" format="fastq" label="${tool.name} on ${on_string}: cor R1" from_work_dir="output_file_directory/input_1.cor.fq"> - <filter>library['lib'] == 'paired' and library['filter'] is False</filter> - </data> - <data name="output3" format="fastq" label="${tool.name} on ${on_string}: cor R2" from_work_dir="output_file_directory/input_2.cor.fq"> - <filter>library['lib'] == 'paired' and library['filter'] is False</filter> - </data> - <data name="output4" format="fastq" label="${tool.name} on ${on_string}: fixed R1" from_work_dir="fixed_input_1.cor.fq"> - <filter>library['lib'] == 'paired' and library['filter']</filter> - </data> - <data name="output5" format="fastq" label="${tool.name} on ${on_string}: fixed R2" from_work_dir="fixed_input_2.cor.fq"> - <filter>library['lib'] == 'paired' and library['filter']</filter> - </data> - </outputs> - <tests> - <test> - <conditional name="library"> - <param name="lib" value="paired"/> - <param name="input1" value="sample_read1.fq" ftype="fastq"/> - <param name="input2" value="sample_read2.fq" ftype="fastq"/> - </conditional> - <conditional name="advanced"> - <param name="kmers" value="23"/> - <param name="maxcorK" value="4"/> - <param name="wk" value="0.95"/> - <param name="ek" value="100000000"/> - </conditional> - <output name="output2" file="sample_read1.cor.fq" ftype="fastq"/> - <output name="output3" file="sample_read2.cor.fq" ftype="fastq"/> - </test> - <test> - <conditional name="library"> - <param name="lib" value="paired"/> - <param name="input1" value="sample_read1.fq" ftype="fastq"/> - <param name="input2" value="sample_read2.fq" ftype="fastq"/> - <param name="filter" value="TRUE"/> - </conditional> - <conditional name="advanced"> - <param name="kmers" value="23"/> - <param name="maxcorK" value="4"/> - <param name="wk" value="0.95"/> - <param name="ek" value="100000000"/> - </conditional> - <output name="output2" file="fixed_sample_read1.cor.fq" compare="sim_size"/> - <output name="output3" file="fixed_sample_read2.cor.fq" compare="sim_size"/> - </test> - </tests> - <help><![CDATA[ - -What is Rcorrector? - -Rcorrector (RNA-seq error CORRECTOR) is a kmer-based error correction method for RNA-seq data. -Rcorrector can also be applied to other type of sequencing data where the read coverage is non-uniform, such as single-cell sequencing. - -Uncorrectable paired-end reads can be removed using FilterUncorrectabledPEfastq. - -More informations, see citations. - - ]]> - </help> - <citations> - <citation type="doi">10.1186/s13742-015-0089-y</citation> - <citation type="bibtex"> - @misc{githubFilterUncorrectabledPEfastq, - author = {Adam H. Freedman}, - year = {2016}, - title = {FilterUncorrectabledPEfastq}, - publisher = {GitHub}, - journal = {GitHub repository}, - url = {https://github.com/harvardinformatics/TranscriptomeAssemblyTools/blob/master/FilterUncorrectabledPEfastq.py} - } - </citation> - </citations> -</tool> +<tool id="rcorrector" name="RNA-seq Rcorrector" version="1.0.3+galaxy1"> + <description>a kmer-based error correction method for RNA-seq data</description> + <requirements> + <requirement type="package" version="1.0.3">rcorrector</requirement> + <requirement type="package" version="3.7">python</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #if $library.lib == "single": + ln -s '$library.input1' input_1.fq && + #end if + #if $library.lib == "paired": + ln -s '$library.input1' input_1.fq && ln -s '$library.input2' input_2.fq && + #end if + run_rcorrector.pl + #if $library.lib == "single": + -s input_1.fq + #end if + #if $library.lib == "paired": + -1 input_1.fq -2 input_2.fq + #end if + -k '$advanced.kmers' -t \${GALAXY_SLOTS:-4} -maxcorK '$advanced.maxcorK' -wk '$advanced.wk' -ek '$advanced.ek' -od output_file_directory + + #if $library.lib == "paired": + #if $library.filter: + && python '$__tool_directory__/FilterUncorrectabledPEfastq.py' + -1 output_file_directory/input_1.cor.fq -2 output_file_directory/input_2.cor.fq -o fixed 2>&1 > rmunfixable.log && cat rmunfixable.log + #end if + #end if + ]]></command> + <inputs> + <conditional name="library"> + <param name="lib" type="select" label="Is this library paired- or single-end?"> + <option value="single">single</option> + <option value="paired" selected="True">paired</option> + </param> + <when value="single"> + <param type="data" name="input1" label="FastQ file" format="fastq,fastqsanger" /> + </when> + <when value="paired"> + <param type="data" name="input1" label="FastQ file R1 (left)" format="fastq,fastqsanger" /> + <param type="data" name="input2" label="FastQ file R2 (right)" format="fastq,fastqsanger" /> + <param name="filter" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Filter uncorrectable reads" help="This will run FilterUncorrectabledPEfastq and remove uncorrectable reads."/> + </when> + </conditional> + <conditional name="advanced"> + <param name="adv" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Additional options"/> + <when value="TRUE"> + <param name="kmers" label="kmer length" value="23" max="32" type="integer" help="(smaller 33, default: 23)"/> + <param name="maxcorK" label="max correction within k-bp window" value="4" type="integer" help="the maximum number of correction within k-bp window (default: 4)"/> + <param name="wk" label="estimate weak kmer count" value="0.95" type="float" help="the proportion of kmers that are used to estimate weak kmer count threshold, lower for more divergent genome (default: 0.95)"/> + <param name="ek" label="expected number of kmers" value="100000000" type="integer" help="expected_number_of_kmers: does not affect the correctness of program but affect the memory usage (default: 100000000)"/> + </when> + <when value="FALSE"> + <param name="kmers" value="23" type="hidden"/> + <param name="maxcorK" value="4" type="hidden"/> + <param name="wk" value="0.95" type="hidden"/> + <param name="ek" value="100000000" type="hidden"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output1" format="fastq" label="${tool.name} on ${on_string}" from_work_dir="output_file_directory/input_1.cor.fq"> + <filter>library['lib'] == 'single'</filter> + </data> + <data name="output2" format="fastq" label="${tool.name} on ${on_string}: cor R1" from_work_dir="output_file_directory/input_1.cor.fq"> + <filter>library['lib'] == 'paired' and library['filter'] is False</filter> + </data> + <data name="output3" format="fastq" label="${tool.name} on ${on_string}: cor R2" from_work_dir="output_file_directory/input_2.cor.fq"> + <filter>library['lib'] == 'paired' and library['filter'] is False</filter> + </data> + <data name="output4" format="fastq" label="${tool.name} on ${on_string}: fixed R1" from_work_dir="fixed_input_1.cor.fq"> + <filter>library['lib'] == 'paired' and library['filter']</filter> + </data> + <data name="output5" format="fastq" label="${tool.name} on ${on_string}: fixed R2" from_work_dir="fixed_input_2.cor.fq"> + <filter>library['lib'] == 'paired' and library['filter']</filter> + </data> + </outputs> + <tests> + <test> + <conditional name="library"> + <param name="lib" value="paired"/> + <param name="input1" value="sample_read1.fq" ftype="fastq"/> + <param name="input2" value="sample_read2.fq" ftype="fastq"/> + </conditional> + <conditional name="advanced"> + <param name="kmers" value="23"/> + <param name="maxcorK" value="4"/> + <param name="wk" value="0.95"/> + <param name="ek" value="100000000"/> + </conditional> + <output name="output2" file="sample_read1.cor.fq" ftype="fastq"/> + <output name="output3" file="sample_read2.cor.fq" ftype="fastq"/> + </test> + <test> + <conditional name="library"> + <param name="lib" value="paired"/> + <param name="input1" value="sample_read1.fq" ftype="fastq"/> + <param name="input2" value="sample_read2.fq" ftype="fastq"/> + <param name="filter" value="TRUE"/> + </conditional> + <conditional name="advanced"> + <param name="kmers" value="23"/> + <param name="maxcorK" value="4"/> + <param name="wk" value="0.95"/> + <param name="ek" value="100000000"/> + </conditional> + <output name="output2" file="fixed_sample_read1.cor.fq" compare="sim_size"/> + <output name="output3" file="fixed_sample_read2.cor.fq" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ + +What is Rcorrector? + +Rcorrector (RNA-seq error CORRECTOR) is a kmer-based error correction method for RNA-seq data. +Rcorrector can also be applied to other type of sequencing data where the read coverage is non-uniform, such as single-cell sequencing. + +Uncorrectable paired-end reads can be removed using FilterUncorrectabledPEfastq. + +More informations, see citations. + + ]]> + </help> + <citations> + <citation type="doi">10.1186/s13742-015-0089-y</citation> + <citation type="bibtex"> + @misc{githubFilterUncorrectabledPEfastq, + author = {Adam H. Freedman}, + year = {2016}, + title = {FilterUncorrectabledPEfastq}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/harvardinformatics/TranscriptomeAssemblyTools/blob/master/FilterUncorrectabledPEfastq.py} + } + </citation> + </citations> +</tool>