Mercurial > repos > peterjc > sample_seqs
changeset 3:02c13ef1a669 draft
Uploaded v0.2.1, fixed missing test file, more tests.
author | peterjc |
---|---|
date | Fri, 27 Mar 2015 09:34:27 -0400 |
parents | da64f6a9e32b |
children | d3aa9f25c24c |
files | tools/sample_seqs/README.rst tools/sample_seqs/sample_seqs.py tools/sample_seqs/sample_seqs.xml |
diffstat | 3 files changed, 38 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/sample_seqs/README.rst Fri Mar 06 11:48:09 2015 -0500 +++ b/tools/sample_seqs/README.rst Fri Mar 27 09:34:27 2015 -0400 @@ -1,7 +1,7 @@ Galaxy tool to sub-sample sequence files ======================================== -This tool is copyright 2014 by Peter Cock, The James Hutton Institute +This tool is copyright 2014-2014 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -63,6 +63,9 @@ This works by first counting all your sequences, then calculates the percentage required in order to sample them uniformly (evenly). This makes two passes through the input and is therefore slower. +v0.2.1 - Was missing a file for the functional tests. + - Included testing of stdout messages. + - Includes testing of failure modes. ======= ======================================================================
--- a/tools/sample_seqs/sample_seqs.py Fri Mar 06 11:48:09 2015 -0500 +++ b/tools/sample_seqs/sample_seqs.py Fri Mar 27 09:34:27 2015 -0400 @@ -64,7 +64,7 @@ options, args = parser.parse_args() if options.version: - print("v0.2.0") + print("v0.2.1") sys.exit(0) in_file = options.input @@ -172,7 +172,7 @@ if N < 1: sys_exit("Bad -c count argument %r" % options.count) total = count_sequences(in_file, seq_format) - print("Input file has %i sequences" % total) + sys.stderr.write("Input file has %i sequences\n" % total) if interleaved: # Paired if total % 2:
--- a/tools/sample_seqs/sample_seqs.xml Fri Mar 06 11:48:09 2015 -0500 +++ b/tools/sample_seqs/sample_seqs.xml Fri Mar 27 09:34:27 2015 -0400 @@ -1,4 +1,4 @@ -<tool id="sample_seqs" name="Sub-sample sequences files" version="0.2.0"> +<tool id="sample_seqs" name="Sub-sample sequences files" version="0.2.1"> <description>e.g. to reduce coverage</description> <requirements> <requirement type="package" version="1.65">biopython</requirement> @@ -122,25 +122,52 @@ <param name="type" value="percentage" /> <param name="percent" value="20.0" /> <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sample_N5.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Sampling 20.000% of sequences" /> + <has_line line="Selected 5 records" /> + </assert_stderr> </test> <test> <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> <param name="type" value="everyNth" /> - <param name="percent" value="5" /> + <param name="every_n" value="5" /> <param name="interleaved" value="true" /> <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.pair_sample_N5.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Sampling every 5th sequence" /> + <has_line line="Selected 3 pairs" /> + </assert_stderr> </test> <test> - <param name="input_file" value="MID4_GLZRM4E04_rnd30.sff" ftype="sff" /> + <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> <param name="type" value="desired_count" /> - <param name="count" value="30" /> - <output name="output_file" file="MID4_GLZRM4E04_rnd30.sff" ftype="sff"/> + <param name="count" value="25" /> + <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Input file has 25 sequences" /> + <has_line line="Taking all the sequences" /> + <has_line line="Selected 25 records" /> + </assert_stderr> </test> <test> <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> <param name="type" value="desired_count" /> <param name="count" value="1" /> <output name="output_file" file="MID4_GLZRM4E04_rnd30_frclip.sample_C1.sff" ftype="sff"/> + <assert_stderr> + <has_line line="Input file has 25 sequences" /> + <has_line line="Sampling just first sequence!" /> + <has_line line="Selected 1 records" /> + </assert_stderr> + </test> + <test expect_failure="true" expect_exit_code="1"> + <param name="input_file" value="MID4_GLZRM4E04_rnd30_frclip.sff" ftype="sff" /> + <param name="type" value="desired_count" /> + <param name="count" value="30" /> + <assert_stderr> + <has_line line="Input file has 25 sequences" /> + <has_line line="Requested 30 sequences, but file only has 25." /> + </assert_stderr> </test> </tests> <help>