# HG changeset patch # User peterjc # Date 1494437339 14400 # Node ID f396701fbf325548bd0d67beb799e59ad436e808 # Parent b38bbcbd458d4b3500cd8f22fe0d8221b2e3328d v0.1.3 Depends on Biopython 1.67 via Tool Shed package or bioconda. diff -r b38bbcbd458d -r f396701fbf32 tools/fastq_paired_unpaired/README.rst --- a/tools/fastq_paired_unpaired/README.rst Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/README.rst Wed May 10 13:28:59 2017 -0400 @@ -1,7 +1,7 @@ Galaxy tool to divide FASTQ files into paired and unpaired reads ================================================================ -This tool is copyright 2010-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2010-2017 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -71,6 +71,9 @@ - Use ``format_source=...`` tag. - Planemo for Tool Shed upload (``.shed.yml``, internal change only). v0.1.2 - Belatedly declare Biopython dependency via Tool Shed. +v0.1.3 - Minor internal changes to Python script for error reporting & style. + - Updated to point at Biopython 1.67 (latest version in Tool Shed). + - Explicit dependency on ``galaxy_sequence_utils``. ======= ====================================================================== @@ -88,17 +91,17 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_paired_unpaired/ + $ planemo shed_update -t testtoolshed --check_diff tools/fastq_paired_unpaired/ ... or:: - $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_paired_unpaired/ + $ planemo shed_update -t toolshed --check_diff tools/fastq_paired_unpaired/ ... To just build and check the tar ball, use:: - $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/fastq_paired_unpaired/ + $ planemo shed_upload --tar_only tools/fastq_paired_unpaired/ ... $ tar -tzf shed_upload.tar.gz test-data/sanger-pairs-forward.fastq diff -r b38bbcbd458d -r f396701fbf32 tools/fastq_paired_unpaired/fastq_paired_unpaired.py --- a/tools/fastq_paired_unpaired/fastq_paired_unpaired.py Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/fastq_paired_unpaired.py Wed May 10 13:28:59 2017 -0400 @@ -14,22 +14,18 @@ See accompanying text file for licence details (MIT license). """ -import os + +import re import sys -import re if "-v" in sys.argv or "--version" in sys.argv: - print("Version 0.1.0") + print("Version 0.1.3") sys.exit(0) -def sys_exit(msg, err=1): - sys.stderr.write(msg.rstrip() + "\n") - sys.exit(err) - try: from Bio.SeqIO.QualityIO import FastqGeneralIterator except ImportError: - sys_exit("Biopython missing") + sys.exit("Biopython missing") msg = """Expect either 3 or 4 arguments, all FASTQ filenames. @@ -58,7 +54,7 @@ same identifier with the fragment at the start of the description, e.g. @HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 1:N:0:TGNCCA -@HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 2:N:0:TGNCCA +@HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 2:N:0:TGNCCA Note that this does support multiple forward and reverse reads per template (which is quite common with Sanger sequencing), e.g. this which is sorted @@ -83,28 +79,28 @@ """ if len(sys.argv) == 5: - format, input_fastq, pairs_fastq, singles_fastq = sys.argv[1:] + seq_format, input_fastq, pairs_fastq, singles_fastq = sys.argv[1:] elif len(sys.argv) == 6: pairs_fastq = None - format, input_fastq, pairs_f_fastq, pairs_r_fastq, singles_fastq = sys.argv[1:] + seq_format, input_fastq, pairs_f_fastq, pairs_r_fastq, singles_fastq = sys.argv[1:] else: - sys_exit(msg) + sys.exit(msg) -format = format.replace("fastq", "").lower() -if not format: - format="sanger" #safe default -elif format not in ["sanger","solexa","illumina","cssanger"]: - sys_exit("Unrecognised format %s" % format) +seq_format = seq_format.replace("fastq", "").lower() +if not seq_format: + seq_format = "sanger" # safe default +elif seq_format not in ["sanger", "solexa", "illumina", "cssanger"]: + sys.exit("Unrecognised format %s" % seq_format) -#Cope with three widely used suffix naming convensions, -#Illumina: /1 or /2 -#Forward/revered: .f or .r -#Sanger, e.g. .p1k and .q1k -#See http://staden.sourceforge.net/manual/pregap4_unix_50.html +# Cope with three widely used suffix naming convensions, +# Illumina: /1 or /2 +# Forward/revered: .f or .r +# Sanger, e.g. .p1k and .q1k +# See http://staden.sourceforge.net/manual/pregap4_unix_50.html re_f = re.compile(r"(/1|\.f|\.[sfp]\d\w*)$") re_r = re.compile(r"(/2|\.r|\.[rq]\d\w*)$") -#assert re_f.match("demo/1") +# assert re_f.match("demo/1") assert re_f.search("demo.f") assert re_f.search("demo.s1") assert re_f.search("demo.f1k") @@ -144,7 +140,7 @@ for title, seq, qual in FastqGeneralIterator(in_handle): count += 1 - name = title.split(None,1)[0] + name = title.split(None, 1)[0] is_forward = False suffix = re_f.search(name) if suffix: @@ -220,7 +216,7 @@ for old in buffered_reads: singles_handle.write(FASTQ_TEMPLATE % old) singles += 1 -in_handle.close +in_handle.close() singles_handle.close() if pairs_fastq: pairs_f_handle.close() @@ -238,4 +234,4 @@ assert count == pairs + singles == forward + reverse + neither, \ "%i vs %i+%i=%i vs %i+%i+%i=%i" \ - % (count,pairs,singles,pairs+singles,forward,reverse,neither,forward+reverse+neither) + % (count, pairs, singles, pairs + singles, forward, reverse, neither, forward + reverse + neither) diff -r b38bbcbd458d -r f396701fbf32 tools/fastq_paired_unpaired/fastq_paired_unpaired.xml --- a/tools/fastq_paired_unpaired/fastq_paired_unpaired.xml Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/fastq_paired_unpaired.xml Wed May 10 13:28:59 2017 -0400 @@ -1,8 +1,8 @@ - + using the read name suffices - biopython - Bio + galaxy_sequence_utils + biopython diff -r b38bbcbd458d -r f396701fbf32 tools/fastq_paired_unpaired/tool_dependencies.xml --- a/tools/fastq_paired_unpaired/tool_dependencies.xml Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/tool_dependencies.xml Wed May 10 13:28:59 2017 -0400 @@ -1,6 +1,9 @@ - - + + + + +