Mercurial > repos > devteam > fastq_paired_end_splitter
changeset 2:9bbe5b7ffa12 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_paired_end_splitter commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 13:55:17 -0400 |
parents | c80bce242eec |
children | 35e38452bb3f |
files | fastq_paired_end_splitter.py fastq_paired_end_splitter.xml tool_dependencies.xml |
diffstat | 3 files changed, 28 insertions(+), 72 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_paired_end_splitter.py Wed Nov 11 12:42:17 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -#Dan Blankenberg -import sys, os, shutil -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqSplitter - -def main(): - #Read command line arguments - input_filename = sys.argv[1] - input_type = sys.argv[2] or 'sanger' - output1_filename = sys.argv[3] - output2_filename = sys.argv[4] - - splitter = fastqSplitter() - out1 = fastqWriter( open( output1_filename, 'wb' ), format = input_type ) - out2 = fastqWriter( open( output2_filename, 'wb' ), format = input_type ) - - i = None - skip_count = 0 - for i, fastq_read in enumerate( fastqReader( open( input_filename, 'rb' ), format = input_type ) ): - read1, read2 = splitter.split( fastq_read ) - if read1 and read2: - out1.write( read1 ) - out2.write( read2 ) - else: - skip_count += 1 - out1.close() - out2.close() - if i is None: - print "Your file contains no valid FASTQ reads." - else: - print 'Split %s of %s reads (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 ) - -if __name__ == "__main__": - main()
--- a/fastq_paired_end_splitter.xml Wed Nov 11 12:42:17 2015 -0500 +++ b/fastq_paired_end_splitter.xml Sat Sep 30 13:55:17 2017 -0400 @@ -1,27 +1,29 @@ -<tool id="fastq_paired_end_splitter" name="FASTQ splitter" version="1.0.0"> - <description>on joined paired end reads</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_paired_end_splitter.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$output1_file' '$output2_file'</command> - <inputs> - <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ reads" /> - </inputs> - <outputs> - <data name="output1_file" format="input" /> - <data name="output2_file" format="input" /> - </outputs> - <tests> - <test> - <param name="input1_file" value="3.fastqsanger" ftype="fastqsanger" /> - <output name="output1_file" file="split_pair_reads_1.fastqsanger" /> - <output name="output2_file" file="split_pair_reads_2.fastqsanger" /> - </test> - </tests> - <help> +<tool id="fastq_paired_end_splitter" name="FASTQ splitter" version="1.1.1"> + <description>on joined paired end reads</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-paired-end-splitter '$input1_file' '${input1_file.extension[len('fastq'):]}' '$output1_file' '$output2_file' + ]]></command> + <inputs> + <param name="input1_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ reads" /> + </inputs> + <outputs> + <data name="output1_file" format_source="input1_file" /> + <data name="output2_file" format_source="input1_file" /> + </outputs> + <tests> + <test> + <param name="input1_file" value="3.fastqsanger" ftype="fastqsanger" /> + <output name="output1_file" file="split_pair_reads_1.fastqsanger" ftype="fastqsanger" /> + <output name="output2_file" file="split_pair_reads_2.fastqsanger" ftype="fastqsanger" /> + </test> + </tests> + <help><![CDATA[ **What it does** -Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length. +Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length. Sequence identifiers will have /1 or /2 appended for the split left-hand and right-hand reads, respectively. @@ -36,7 +38,6 @@ +HWI-EAS91_1_30788AAXX:7:21:1542:1758 hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR - ----- **Outputs** @@ -54,14 +55,8 @@ GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR - ------- - - - </help> - - <citations> - <citation type="doi">10.1093/bioinformatics/btq281</citation> - </citations> - + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Wed Nov 11 12:42:17 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>