# HG changeset patch # User peterjc # Date 1494501878 14400 # Node ID 31f5701cd2e95c2f62d82068c6e37f8b56527bed # Parent 6b71ad5d43fba940de4103d9d2c04db69c7b84d4 v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda. diff -r 6b71ad5d43fb -r 31f5701cd2e9 tools/sample_seqs/README.rst --- a/tools/sample_seqs/README.rst Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/README.rst Thu May 11 07:24:38 2017 -0400 @@ -1,7 +1,7 @@ Galaxy tool to sub-sample sequence files ======================================== -This tool is copyright 2014-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2014-2017 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -71,6 +71,8 @@ - Planemo for Tool Shed upload (``.shed.yml``, internal change only). v0.2.3 - Do the Biopython imports at the script start (internal change only). - Clarify paired read example in help text. +v0.2.4 - Depends on Biopython 1.67 via legacy Tool Shed package or bioconda. + - Style changes to Python code (internal change only). ======= ====================================================================== @@ -84,17 +86,17 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/sample_seqs/ + $ planemo shed_update -t testtoolshed --check_diff tools/sample_seqs/ ... or:: - $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/sample_seqs/ + $ planemo shed_update -t toolshed --check_diff tools/sample_seqs/ ... To just build and check the tar ball, use:: - $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/sample_seqs/ + $ planemo shed_upload --tar_only tools/sample_seqs/ ... $ tar -tzf shed_upload.tar.gz test-data/MID4_GLZRM4E04_rnd30_frclip.pair_sample_N5.sff diff -r 6b71ad5d43fb -r 31f5701cd2e9 tools/sample_seqs/sample_seqs.py --- a/tools/sample_seqs/sample_seqs.py Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/sample_seqs.py Thu May 11 07:24:38 2017 -0400 @@ -63,7 +63,7 @@ options, args = parser.parse_args() if options.version: - print("v0.2.3") + print("v0.2.4") sys.exit(0) try: @@ -146,6 +146,7 @@ sys.stderr.write("Sampling every %ith sequence\n" % N) def sampler(iterator): + """Sample every Nth sequence.""" global N count = 0 for record in iterator: @@ -157,11 +158,12 @@ percent = float(options.percent) / 100.0 except ValueError: sys.exit("Bad -p percent argument %r" % options.percent) - if percent <= 0.0 or 1.0 <= percent: + if not(0.0 <= percent <= 1.0): sys.exit("Bad -p percent argument %r" % options.percent) sys.stderr.write("Sampling %0.3f%% of sequences\n" % (100.0 * percent)) def sampler(iterator): + """Sample given percentage of sequences.""" global percent count = 0 taken = 0 @@ -215,6 +217,7 @@ assert taken == N, "Picked %i, wanted %i" % (taken, N) else: def sampler(iterator): + """Sample given number of sequences.""" # Mimic the percentage sampler, with double check on final count global N, total # Do we need a floating point fudge factor epsilon? @@ -268,12 +271,11 @@ raise ValueError( "Records in Fasta files should start with '>' character") try: - id = line[1:].split(None, 1)[0] + line[1:].split(None, 1)[0] except IndexError: if not no_id_warned: sys.stderr.write("WARNING - Malformed FASTA entry with no identifier\n") - no_id_warned = True - id = None + no_id_warned = True lines = [line] line = handle.readline() while True: @@ -346,6 +348,7 @@ count = writer.write_file(iterator_filter(SffIterator(in_handle))) return count + if seq_format == "sff": count = sff_filter(in_file, out_file, sampler, interleaved) elif seq_format == "fasta": diff -r 6b71ad5d43fb -r 31f5701cd2e9 tools/sample_seqs/sample_seqs.xml --- a/tools/sample_seqs/sample_seqs.xml Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/sample_seqs.xml Thu May 11 07:24:38 2017 -0400 @@ -1,8 +1,7 @@ - + e.g. to reduce coverage - biopython - Bio + biopython diff -r 6b71ad5d43fb -r 31f5701cd2e9 tools/sample_seqs/tool_dependencies.xml --- a/tools/sample_seqs/tool_dependencies.xml Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/tool_dependencies.xml Thu May 11 07:24:38 2017 -0400 @@ -1,6 +1,6 @@ - - + +