Mercurial > repos > devteam > fastq_paired_end_splitter
changeset 0:c549e99026db draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 27 Jan 2014 09:29:20 -0500 |
parents | |
children | c80bce242eec |
files | fastq_paired_end_splitter.py fastq_paired_end_splitter.xml test-data/3.fastqsanger test-data/split_pair_reads_1.fastqsanger test-data/split_pair_reads_2.fastqsanger tool_dependencies.xml |
diffstat | 6 files changed, 165 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_paired_end_splitter.py Mon Jan 27 09:29:20 2014 -0500 @@ -0,0 +1,33 @@ +#Dan Blankenberg +import sys, os, shutil +from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqSplitter + +def main(): + #Read command line arguments + input_filename = sys.argv[1] + input_type = sys.argv[2] or 'sanger' + output1_filename = sys.argv[3] + output2_filename = sys.argv[4] + + splitter = fastqSplitter() + out1 = fastqWriter( open( output1_filename, 'wb' ), format = input_type ) + out2 = fastqWriter( open( output2_filename, 'wb' ), format = input_type ) + + i = None + skip_count = 0 + for i, fastq_read in enumerate( fastqReader( open( input_filename, 'rb' ), format = input_type ) ): + read1, read2 = splitter.split( fastq_read ) + if read1 and read2: + out1.write( read1 ) + out2.write( read2 ) + else: + skip_count += 1 + out1.close() + out2.close() + if i is None: + print "Your file contains no valid FASTQ reads." + else: + print 'Split %s of %s reads (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 ) + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_paired_end_splitter.xml Mon Jan 27 09:29:20 2014 -0500 @@ -0,0 +1,66 @@ +<tool id="fastq_paired_end_splitter" name="FASTQ splitter" version="1.0.0"> + <description>on joined paired end reads</description> + <requirements> + <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> + </requirements> + <command interpreter="python">fastq_paired_end_splitter.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$output1_file' '$output2_file'</command> + <inputs> + <param name="input1_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ reads" /> + </inputs> + <outputs> + <data name="output1_file" format="input" /> + <data name="output2_file" format="input" /> + </outputs> + <tests> + <test> + <param name="input1_file" value="3.fastqsanger" ftype="fastqsanger" /> + <output name="output1_file" file="split_pair_reads_1.fastqsanger" /> + <output name="output2_file" file="split_pair_reads_2.fastqsanger" /> + </test> + </tests> + <help> +**What it does** + +Splits a single fastq dataset representing paired-end run into two datasets (one for each end). This tool works only for datasets where both ends have **the same** length. + +Sequence identifiers will have /1 or /2 appended for the split left-hand and right-hand reads, respectively. + +----- + +**Input format** + +A multiple-fastq file, for example:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + + +----- + +**Outputs** + +Left-hand Read:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh + +Right-hand Read:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + +------ + +**Citation** + +If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_ + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.fastqsanger Mon Jan 27 09:29:20 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAAACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?hhJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTTATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCATAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234 +TATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJhhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_1.fastqsanger Mon Jan 27 09:29:20 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA ++HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h +@HWI-EAS91_1_30788AAXX:7:45:408:807/1 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT ++HWI-EAS91_1_30788AAXX:7:45:408:807/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA ++HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:64:947:234/1 +TATCAAAAAAGAATATAATCTGAATCAACACTACAA ++HWI-EAS91_1_30788AAXX:7:64:947:234/1 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_2.fastqsanger Mon Jan 27 09:29:20 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +ACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +hJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807/2 +ATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807/2 +hhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +TAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +hhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234/2 +CCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234/2 +hhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jan 27 09:29:20 2014 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="galaxy_sequence_utils" version="1.0.0"> + <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>