# HG changeset patch
# User peterjc
# Date 1486054357 18000
# Node ID 9b074c1db68e321ab34158a60104b11c0f795e28
# Parent 708ce1e5ac947c294c0ac454c56c9792684444b7
v0.0.14 galaxy_sequence_utils dependency etc
diff -r 708ce1e5ac94 -r 9b074c1db68e test-data/MID4_GLZRM4E04_rnd30.fasta
--- a/test-data/MID4_GLZRM4E04_rnd30.fasta Thu Nov 21 10:47:23 2013 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
->GLZRM4E04IPVGX
-CCGACTCGAGGGTGGGTTCAGGAGGTGATCACCGGCGTTCGTGCCGCCGGTTTTAATGGCGAGGGAGACTAGGGCGGCGTTAGGGTTTGGGTTCGGT
->GLZRM4E04I9EJK
-CCGACTCGAGTTTTTTAAATGCTTGATTTGGGCCGGAAAACTGATCTAGGGGGGTTTCGGTAGACTCGTGGGGGTAGATAGGCATTTTGTGGAGCTGGGTACTGGGAGAGTGATGTACGACTTTGGGGGTTCTGTGGAGAGAAGTGGCATTTGGAGAACTCGAGTCGG
->GLZRM4E04JHW0A
-TCGACTCGAGCCCCTCAAGTCCAATCCCCAAAAAATTCCGTATGCCCGAAATTCCCAAATATAATGGTACGACCGATCCTAACGAACACGTCACCTCCCACACATGTGCCATCAAAGGCAACGATTTGGAGGATGATGAGATCGAATCCGTGTTGTTGAAAAAGTTCGGAGAGACCCTCGCAAAGGGAGCAATGATCTGGTATCACAACTTACCACCAAATTCCGTTGATTCTTTCGCCATGTTAGCAGATTCGTTCATAAAAGCACATGCTGGTGCCATAAAGGTTGCAACAAGGAAATCGGACCTCTTCAAAGTAAACAAAGGGGTAACGAAATACTGAGGGAATTCGTATCCCGATTTTAAATGGAGCGTATGGACTTCCCCC
->GLZRM4E04IBNFY
-CCGACTCGAGAGGAAGATGTGGGCTATGTGTGGTTGTGGTATTGGGATTGTTGTTTAGGGAATGTTGTGGTATATGGGAACATGTGGTGCGAGTTGTTATATTGCGTTGTTATGTTTTTTGGCACATGTGGGATTGTTTAGAGGAATTGTCATGGTGTTTGCATGGTGTTCGCTATGACTTTGATTTATGAGATGATAATATTATTGTTGTGCACTTGTGAGATTTATGCACTGATCATGTTATTAGCGAGTATCGTTAATAATTATTGTCTCTATTACCTCACGAGGTAATAGAG
->GLZRM4E04IKCQP
-CCGACTCGGAGTCTTTTTTCAGGTATTGACACTAACAGAATAGGGAGTCTCGACCAGCGAGCTTCTCCCCGGAGGTAAGAAAAGAAGGGTTTCGGCACAGTTTATATACAGTTAAGATAATATCAAAAGCGGAAAAAGACAACATTTAGCACTTTTATGCAAAACATGTAATAAAGATCAGATAGTAAAGCCAAATATAACAATTATTCTAAGCTCGAATTCTTGGACCCTGAACCAGTGGTTCCGGGTCACACTTCCCCAGCAGAGTCGCCAGAGCTGTCACACCTCCTTTTTGCGCGCCCACCCCGAAGGGTTAAATGCGCGGGTGGAGTTTTTCCAATTTAAGTGACAATATTCGAAAATGGGATTATTTATTTAATTCAGAGTCGCCACTTGGGAAAGGTTTGGCTTTTGGTGTCCCAAGTCACCGGTTTATCCTGAACCCCA
->GLZRM4E04ITXFU
-CCGACTCGAGAGAAAATTCAGGCAACTAGTGCCTCAACCAAGTTTAGATAGAACACTTACCTCAAACAAGCCAAAACAAAATACCGAGAAAGCCAAACGACACTCTAGAAGTGCCATCCTACGCGTAACAACCTCTGAACGGCTCGAAACTAGCTAAAAGCAACCCAAATACATCAAATAATGCCTAAGGGAAACAACCCCAAATGATAAAGGTCGAATCTTTAATCAAAAACTCAAAGTCAACCAAAAAGTCAAACCTGAACCCCACCTCGAGT
->GLZRM4E04JAY4Z
-CCGACTCGAGGTTTGGTCTGGTATGAAGCTGGGGTTGTTTGGTGTAGATCGAGTTTTGACTCGAATCTTCAAATGAAGATTCGAGGACCTGGGGGTGGATTCGAAGTGTGTGGTTGGTAGATTTAGGTTCAGGTGGCATGGGGTTC
->GLZRM4E04H9M21
-CCGACTCGAGGAAGGGAAGTCCCTAACTTGATTGGACATGTTTTGTCGAAAGCTAGTAACTTGAAGGTATTAATATTTCTTAGGTTTGACCGTAGGTTGACGTTATAGCTACCGGGCGATTTTAGTTTCGGGACTTTGGAGGCTCGAGTCGG
->GLZRM4E04IQDP5
-CCGACTCGAGGGGAAGTTTACGTTTTTCCAGCGAGCTGGTGAGATCGGGTGAAAGGGAAAGGTTGTTGCTGCTGTTGAGTGCTATCGTGAGGAGGAAGACGGGTGGAGGTTGTTTTTGTTGGTGAAGGTTTAGGGTCTCTGGTTATTGTGTCCTACGTGAGGGGTGGTCGTTTTCCGACGATGGTGAGCTGAAGGGTGGTCGTCGATGGAGCTCGTGGATGTTGGTCCGGCATGCATGGGTCGTGTATGGCTGCTCAGGGTGTAGGCTGGCGTTTGGACAGCACAGCATCGCCGGCAGCGTAGGGTCGTTTGGACAGGAACAACGATGCGTAAAGGCAGTCTCGAGTCGG
->GLZRM4E04I0CYE
-TTCAGGAGGTGATCACCGGCGTTCGTGCCGCCGGCTTTAATGGCGAGGGAGACTAGGGAAGCGTTAGGGTTTGGGTTCGGT
->GLZRM4E04I2NA7
-CCGACTCGAGTTAAAACGCTTACGTATCTAGTCGGTAACCTGCATTAACCGGTAGTATCAATTTGGGAACATCGACTAAGTTTGCGATCTTCGAATA
->GLZRM4E04I5BWJ
-CCGACTCGAGTGTGGGAAATGCTTGCGAAACTGGAGGCATTGATTGGGGATGTTCCTGAAGGTGAGGAATTTTAGACTCTTGTGACAAGGCTTGCCTACCTTGAGGTAGAATTGGCAAGGTTAAGTCAAGAGAATGCATATCTGAACAGCGAGATAGTGGTGCTGCGATGTGCTGTGGGCGAGGATGCTCCTCAACGTGGTGCTGATCGTCTCAAGGTAAAATTCCTGAACCTAAGGCATTTGGGGGC
->GLZRM4E04IK8WX
-GCGCAAGCTTCGCGCCCAGGGTAGAATCCATAGCTGGTACACTCCGCGCGGGATCCGCG
->GLZRM4E04I22QH
-CCGACTCGAGCCCTTCTCTGGTTTTTCTCATCTCACCTTGGCAGTAATTTTTTTGAGAATGGTAACATTATATAAATATAAAACAGCGACAAAGTTTGTGCTGCTAAAAGTGATCCTTGTAAGTTTTTGTAGTGGCGGGATTCTTTTCTTTTTGGGGATGTATTCTATGATATCAAGTATGGAGTTACTATCATTTACAGGGCATTCTTTACACCAAAAAAGAAAAAGCAACTAAACAGTTCATTTTGATTTTCTGAATGGAGTTGCTAAACATCCTCAAAAACATCTAGAGTTCCTCTCGTTCCAGACTGACCACCAAATGCAGGCTGGGATCTTCTGCCGCCATTTCTTCTGTCTGACTCCACCTGTCACACCAGACCCCC
->GLZRM4E04IVVFA
-GGATGGGGTAATATGAAGGGTCGAGATCTTTATGCTGACGGGGAACGGTGTCGTTTCAACCTGAAAAGAGGCTCGAGTCGG
->GLZRM4E04ILU3V
-CCGACTCGAGCTTCCAAGTCCAAAGATGGAGAAATGAATCAAGCCCTCG
->GLZRM4E04IDVJT
-CCGACTCGAGGTGTGGTCTGGGGCGGTAGTTATTCTGCGGGGGTCGTGGGTTATAGTGGTTGCGGTAAGGAGGCTGATTTCTGGGAGGTTGAGCTGGGCCTCGGTTGGCTTGTTGTGGTGGATGGTTATAAGGTTGGGCATTCATGACCATATAAGGTTGATGAGCATATGCCAAATTTGAGTGGGGGTAGTAGTGTTGTGGGGCTCTTTTCGGAAAATGGGGCCTGGGATGACGATACTCCCTTGCTTCAGAGGCTGCCATAGCCGTTTCTTCCTTCTTCTTTCCCCTTGTCGTTCCTCCAGACCCCACCTCGAGTCGG
->GLZRM4E04I3ZJ6
-CCGACTCGAGGGGAGTAAATGCATTAAGCAGCATGACAACCCGTGTTATCAAAGGCGATAAGCGCAAAAAACTCTAAGGTCTGTTGAGGCTTTAAGCGCAAAACGCAAATAAAGCGTGGGCTTTAATGAAGAAAGGCGCAAATGCAGAAAAACAATCAAGGATTCGAGGTATTGGGCATTTGATGGGCTCGAGTCGG
->GLZRM4E04I3UBT
-CCGACTCGAGAAAACCTTTACGAATCCCTAACTCTTATCCCTCGAATCCCAGCCGTGTGCTTCAAATCTTAGATCGAAAATCAGGTCGGAACCTTATCTCGTCCAATAGCCTCCAATTTCACACCACAGCTTCCCCAAGACTTCCCCGCTCCAGTCCCACGCTCAGTTTCCTTCGAATCACCCCCGAGCTCCTCAAATCTTAGGTCAAAAGAATCGACCCAAACCCTAATCCATTCATATACCACTAAACTCACACCCTGCCACCCCCGACTCTCCCTCGTTCGTAAA
->GLZRM4E04H59S1
-CCGACTCGAGGGAAGGTTTACGTAGTAGACAAAACTAAACATACAAACTAGGAAACATAATAAATCAAACACAAGTAAGGAAAGAGGAGCATGTAGAACCGACTAAGAACCAAATTAGGAGAAAAATCAGGCAAACCCTAATTTTGCCTGGAATCACAAATTGGCCGAAGAATCTTGGTGAATTTGAACCTCATAGCCTTGCTATGAACGTATAACGATGAAATATCGTCCAAATCTTGGAGATTAAAGCCGATCTCCTTTGATTCAAAGATTGATATTTGCCAAAAGTGAGACAAACACTAAGTAACACCACAATCTTGTAATTAGCAATGAGATAACCCCGTAAAGGGGGCTCGAGTCGG
->GLZRM4E04JFA38
-CCGACTCGACGCCTCTCAAGTCCAAAAATGAAGGAAAGTGGAATCAAACCCACGATTTGGCTCATTTTCTGCCAGTTAAACCGCACCTACGGTCCATTAACCGCTTCTGCGGCTAAAGCTCCGCACCTTCACACTTATATCCGCATCTGCGGATGCGCTTTTGCGCTCAATCGTCCGCTTCTGCAGAACCTTGGGTCCTTCTCATCTCCGTATCTGCGACTGCCCTTCCACAGAAGCGGCTCCACTCCTGCGGCTCACTCGTCGCATCTGCGACCACTGACTCCCCGGTCTAAAGGCGCACCTGCGATCCCAGCCTCGCTTCTGCGGAGCCGCACCTGCGGACTT
->GLZRM4E04J4EK3
-CCGACTCGAGGTGGGGTCTGGTGCTACCTTTGATGAGGTTGTCGACATTGCTCGACAGATTGAGATGGTTCGTGGTCAGGAAAGGGTTGAGAGGGAGGCCAAGAGGCCTCGTGGTCAGGGTGGATTTAGCAGTGCTCATTTTGGGGGTCAGTTTCAGCACGGTAGAGGTCGTCATTTCAGACAGGCTCAGACAGCTTGGCCATTTCACCGTGGTGCATCATCTGGCCATGGTTCTCATAGTCATCAACATGGTCAGTCATCATTTAGTGCACTACCAGCGCAGAGTTCTCATCATGTCCCACCACTCAGGTATCCTTGGGTAGTTCTTTGGGCATCAGGAGCACTAATTTCGCCAGAGGGGGCTCGAGTCGG
->GLZRM4E04IK96G
-CCGACTCGAGCCACTCTTCAGGTCCAAACGCCATTAACAAACAGCTCGTCACACTCATACACACATGCACCAACACACCCAGCTGCGTCTTCATCGAGTTTCATCGAGTCTTCATCTTCTTCGTCGAAGGCAACACCTCACCACCCTCGAACCCGA
->GLZRM4E04JVL8Q
-CCGACCGAGGGATGGGTTTACGGTAGTTTAATAGCTCCCATAGGGTTGATTTTGGTTTTTAAAGAGCGTATCCCGGACGTTGATTTTGGAGGTCTGTAGGTCAGTTTCGATGTATTTTGGCGAAAGTTAGAAATTGGTGGAATTTTGAAAAATTTGACCGGGAGTGGACTTTTTGATGTCGGGGCCGGATCCCGATTTTGAAAGTTGAAATATGTCCATAATGTCAATTATGATTTGTGTGCAAAGTTTGGTATCAATCGGAGTTGTTTTGGTTTGAATCGGCGTCGGTTTCAGAATTTGAAAGTTCATAAGTTCATTAGGCATGAATTGGGGTGCGATTCGTATTTTTGATGTTGTTTGATGCGATTTATGGCCTCGAGTCACTCCGTTAGGTGTTTTGAAACTTTTTGGTATATTCGGAAGGGGTCCCGAGTGTGATTCGGATCGATAACAGAACAAAATTT
->GLZRM4E04IOQ36
-CCGACTCGAGTTTTTAAATGCATAATAATACAAATTTGTTGCTTAATAGTATTAGAGAGAGGCATAAAATGATGTTCCCTGTATATGAGTTGATTTACGCAAATGTCCCTCTTTTAGGCTATTATTCAAATTTTGTTAATATTTTTAAAAAAATTATTTAGTCCTAAAGAAATTACCCCTTCAGGGTAACGTTAGACCTTTTGTTCATATAGTTTTCAGTTTGCTCAATCGTACATATAAAATATTAGAATGTGATCTAACGTTATAATAAAGTATCAAATTTAAACCATACTCTATTTTCACGTAAGATTTTCAGTTTGCTCAAGAGTGGAAGAGAAAATAAAAGGGTACATTTACTAAGCAGCTATGCCAAACTGGGAAAAATTTCT
->GLZRM4E04JBJJ1
-CCGACTCGAGGGGTAGAAATGCAAAGGAGGTAAGGGACATAAGGGTGCCTATAGGTTGAGAATTGGGTCATGGAACATAGTTACATTGATGGGTAAGTCTATAGAGCTGGCGAAGATCCTACAAAAGAGGAAAGTCAATATAATGTATGTCCAAGAGACTCGGTGGATAGGGTCGAGGGCGAAGAATACAGACGAGTATAAGCTATGGTACTTTGGAGTCCTGAAGGGTAAGAATGGTGTAGGCATTTAGTCGACTCGAGTCGG
->GLZRM4E04IEFNO
-CCGACTCGAGAGGAGATTCAGGGACAAAGAAGAGGAGCTCGAGGTGAGCAGTGGGGTCGAGGCACAATGTGCCAACCCAAGCCCAAGTGGTTTCATTGTGTGCCGAGCTCGGGGAGTGTCAACCCAAAATGGTCGCTCTGAGTGGCGAGGTCACTGAGAAGGCAGTGGGTTTAGAGAAGGCAGAGTTGGCCCAGCTATCAACTGCAAGGAGAACATGGGCTTTAGAGATCATGATCCGCGTTCTCAGTTCTGAGTGGGAGGGTGATTTGGACACGGCCAAGCTCAGAGAAGAGCAGCTTGATGAGCGGATAGGGGAGCTGGAAAAAGAGACTTTGGGCCTTAGTGATCGAGTTGCTGCTCTCGAGGCCGAAAAGGCACAGTTATTGGCTCAACCGTCCTCCTCCCGCGCTTCTGCTTTCCTGATGTTGCACGATATTGTATGAGGAATGGATTCACGCTGAAGCTCAGCTAGA
->GLZRM4E04JOT5I
-CCGACTCGAGGGGAGTTTACGGGAGTACTTCTCATGAGGTATTGTGTATAAGTGATATATTAGTCATTTGAGGGGGAGAGTTTGAGATCAGATAAGAGATTCCTGTATTATTGAGGTTTAGAGACTGGATGTTCTCAAAGGCAAACTATTCCTTGGTTGCGGATCGTAAAGGGAGGCTCGAGTCGG
->GLZRM4E04J4HNG
-CCGACTCGAGCCGCCCACTGTGGTAGTCAAACAATCGCGAAT
->GLZRM4E04JC544
-CCGACTCGAGTTTTTTAAATGCATATTCGGGGTACAACCAAATTAAAATGGACCCCAGTGACGAAGAAAAACTTACTTTCATCACAGACAGGGGGACGTTACAGTTATAAAGTAATGCCCTTTGGTTCTCAAAAATAGCTGGAGCAACCTATCAAAGGTTGGTCACCAAAATATTCCAAGAATATTTAGAAAAATACATAGAGGTATATATAGACGATATGCTCGTCAAAACCAAACATTCTCATGATCATATTTCTCATTTATCTGTTACATTTGAAATTTTACGAAATTTAATATGAAACTCAATCCAGAAAATGTGCATTTAGGGGGC
diff -r 708ce1e5ac94 -r 9b074c1db68e test-data/MID4_GLZRM4E04_rnd30.sff
Binary file test-data/MID4_GLZRM4E04_rnd30.sff has changed
diff -r 708ce1e5ac94 -r 9b074c1db68e tools/seq_primer_clip/README.rst
--- a/tools/seq_primer_clip/README.rst Thu Nov 21 10:47:23 2013 -0500
+++ b/tools/seq_primer_clip/README.rst Thu Feb 02 11:52:37 2017 -0500
@@ -1,7 +1,7 @@
Galaxy tool to primer clip (trim) FASTA, FASTQ or SFF reads
===========================================================
-This tool is copyright 2011-2013 by Peter Cock, The James Hutton Institute
+This tool is copyright 2011-2017 by Peter Cock, The James Hutton Institute
(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
See the licence text below (MIT licence).
@@ -23,18 +23,19 @@
There are just two files to install:
-* seq_primer_clip.py (the Python script)
-* seq_primer_clip.xml (the Galaxy tool definition)
+* ``seq_primer_clip.py`` (the Python script)
+* ``seq_primer_clip.xml`` (the Galaxy tool definition)
-The suggested location is a new tools/seq_primer_clip folder. You will also
-need to modify the tools_conf.xml file to tell Galaxy to offer the tool::
+The suggested location is a new ``tools/seq_primer_clip/`` folder. You will
+also need to modify the ``tools_conf.xml`` file to tell Galaxy to offer the
+tool::
-If you wish to run the unit tests, also add this to tools_conf.xml.sample
-and move/copy the test-data files under Galaxy's test-data folder. Then::
+If you wish to run the unit tests, also move/copy the ``test-data/`` files
+under Galaxy's ``test-data/`` folder. Then::
- $ ./run_functional_tests.sh -id seq_primer_clip
+ $ ./run_tests.sh -id seq_primer_clip
You will also need to install Biopython 1.54 or later. That's it.
@@ -62,6 +63,14 @@
- Development moved to GitHub, https://github.com/peterjc/pico_galaxy
- Renamed folder and adopted README.rst naming.
v0.0.11 - Correct automated dependency definition.
+v0.0.12 - Simplified XML to apply input format to output data.
+ - Tool definition now embeds citation information.
+v0.0.13 - Use ``format_source=...`` tag.
+ - Reorder XML elements (internal change only).
+ - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
+ - Fixed input file help text.
+v0.0.14 - Updated to point at Biopython 1.67 (latest version in Tool Shed).
+ - Explicit dependency on ``galaxy_sequence_utils``.
======= ======================================================================
@@ -75,19 +84,24 @@
Development has now moved to a dedicated GitHub repository:
https://github.com/peterjc/pico_galaxy
-For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
-the following command from the Galaxy root folder::
- $ tar -czf seq_primer_clip.tar.gz tools/seq_primer_clip/README.rst tools/seq_primer_clip/seq_primer_clip.* tools/seq_primer_clip/tool_dependencies.xml test-data/dop_primers.fasta test-data/MID4_GLZRM4E04_rnd30*
+For pushing a release to the test or main "Galaxy Tool Shed", use the following
+Planemo commands (which requires you have set your Tool Shed access details in
+``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
-Check this worked::
+ $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_primer_clip/
+ ...
- $ tar -tzf seq_primer_clip.tar.gz
- tools/seq_primer_clip/README.rst
- tools/seq_primer_clip/seq_primer_clip.xml
- tools/seq_primer_clip/seq_primer_clip.py
- tools/seq_primer_clip/tool_dependencies.xml
- test-data/dop_primers.fasta
+or::
+
+ $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_primer_clip/
+ ...
+
+To just build and check the tar ball, use::
+
+ $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/seq_primer_clip/
+ ...
+ $ tar -tzf shed_upload.tar.gz
test-data/MID4_GLZRM4E04_rnd30.fasta
test-data/MID4_GLZRM4E04_rnd30.fastqsanger
test-data/MID4_GLZRM4E04_rnd30_fclip.fasta
@@ -97,6 +111,11 @@
test-data/MID4_GLZRM4E04_rnd30_frclip.fastqsanger
test-data/MID4_GLZRM4E04_rnd30_frclip.sff
test-data/MID4_GLZRM4E04_rnd30.sff
+ test-data/dop_primers.fasta
+ tools/seq_primer_clip/README.rst
+ tools/seq_primer_clip/seq_primer_clip.py
+ tools/seq_primer_clip/seq_primer_clip.xml
+ tools/seq_primer_clip/tool_dependencies.xml
Licence (MIT)
diff -r 708ce1e5ac94 -r 9b074c1db68e tools/seq_primer_clip/seq_primer_clip.py
--- a/tools/seq_primer_clip/seq_primer_clip.py Thu Nov 21 10:47:23 2013 -0500
+++ b/tools/seq_primer_clip/seq_primer_clip.py Thu Feb 02 11:52:37 2017 -0500
@@ -26,8 +26,8 @@
(formerly the Scottish Crop Research Institute, SCRI), UK. All rights reserved.
See accompanying text file for licence details (MIT/BSD style).
-This is version 0.0.8 of the script. Currently it uses Python's regular
-expression engine for finding the primers, which for my needs is fast enough.
+NOTE: Currently it uses Python's regular expression engine for finding the
+primers, which for my needs is fast enough.
"""
import sys
import re
@@ -35,52 +35,48 @@
from galaxy_utils.sequence.fastq import fastqReader, fastqWriter
if "-v" in sys.argv or "--version" in sys.argv:
- print "v0.0.5"
+ print "v0.0.12"
sys.exit(0)
-def stop_err(msg, err=1):
- sys.stderr.write(msg)
- sys.exit(err)
-
try:
from Bio.Seq import reverse_complement
from Bio.SeqIO.SffIO import SffIterator, SffWriter
except ImportError:
- stop_err("Requires Biopython 1.54 or later")
+ sys.exit("Requires Biopython 1.54 or later")
try:
from Bio.SeqIO.SffIO import ReadRocheXmlManifest
except ImportError:
- #Prior to Biopython 1.56 this was a private function
+ # Prior to Biopython 1.56 this was a private function
from Bio.SeqIO.SffIO import _sff_read_roche_index_xml as ReadRocheXmlManifest
-#Parse Command Line
+# Parse Command Line
try:
in_file, seq_format, primer_fasta, primer_type, mm, min_len, keep_negatives, out_file = sys.argv[1:]
except ValueError:
- stop_err("Expected 8 arguments, got %i:\n%s" % (len(sys.argv)-1, " ".join(sys.argv)))
+ sys.exit("Expected 8 arguments, got %i:\n%s" % (len(sys.argv) - 1, " ".join(sys.argv)))
if in_file == primer_fasta:
- stop_err("Same file given as both primer sequences and sequences to clip!")
+ sys.exit("Same file given as both primer sequences and sequences to clip!")
if in_file == out_file:
- stop_err("Same file given as both sequences to clip and output!")
+ sys.exit("Same file given as both sequences to clip and output!")
if primer_fasta == out_file:
- stop_err("Same file given as both primer sequences and output!")
+ sys.exit("Same file given as both primer sequences and output!")
try:
mm = int(mm)
except ValueError:
- stop_err("Expected non-negative integer number of mismatches (e.g. 0 or 1), not %r" % mm)
+ sys.exit("Expected non-negative integer number of mismatches (e.g. 0 or 1), not %r" % mm)
if mm < 0:
- stop_err("Expected non-negtive integer number of mismatches (e.g. 0 or 1), not %r" % mm)
-if mm not in [0,1,2]:
+ sys.exit("Expected non-negtive integer number of mismatches (e.g. 0 or 1), not %r" % mm)
+if mm not in [0, 1, 2]:
raise NotImplementedError
try:
min_len = int(min_len)
except ValueError:
- stop_err("Expected non-negative integer min_len (e.g. 0 or 1), not %r" % min_len)
+ sys.exit("Expected non-negative integer min_len (e.g. 0 or 1), not %r" % min_len)
if min_len < 0:
- stop_err("Expected non-negtive integer min_len (e.g. 0 or 1), not %r" % min_len)
+ sys.exit("Expected non-negtive integer min_len (e.g. 0 or 1), not %r" % min_len)
if keep_negatives.lower() in ["true", "yes", "on"]:
@@ -88,7 +84,7 @@
elif keep_negatives.lower() in ["false", "no", "off"]:
keep_negatives = False
else:
- stop_err("Expected boolean for keep_negatives (e.g. true or false), not %r" % keep_negatives)
+ sys.exit("Expected boolean for keep_negatives (e.g. true or false), not %r" % keep_negatives)
if primer_type.lower() == "forward":
@@ -101,7 +97,7 @@
forward = False
rc = True
else:
- stop_err("Expected foward, reverse or reverse-complement not %r" % primer_type)
+ sys.exit("Expected foward, reverse or reverse-complement not %r" % primer_type)
ambiguous_dna_values = {
@@ -119,9 +115,9 @@
"H": "ACTMWYH",
"D": "AGTRWKD",
"B": "CGTSYKB",
- "X": ".", #faster than [GATCMRWSYKVVHDBXN] or even [GATC]
+ "X": ".", # faster than [GATCMRWSYKVVHDBXN] or even [GATC]
"N": ".",
- }
+}
ambiguous_dna_re = {}
for letter, values in ambiguous_dna_values.iteritems():
@@ -134,39 +130,41 @@
def make_reg_ex(seq):
return "".join(ambiguous_dna_re[letter] for letter in seq)
+
def make_reg_ex_mm(seq, mm):
if mm > 2:
raise NotImplementedError("At most 2 mismatches allowed!")
seq = seq.upper()
yield make_reg_ex(seq)
- for i in range(1,mm+1):
- #Missing first/last i bases at very start/end of sequence
- for reg in make_reg_ex_mm(seq[i:], mm-i):
+ for i in range(1, mm + 1):
+ # Missing first/last i bases at very start/end of sequence
+ for reg in make_reg_ex_mm(seq[i:], mm - i):
yield "^" + reg
- for reg in make_reg_ex_mm(seq[:-i], mm-i):
+ for reg in make_reg_ex_mm(seq[:-i], mm - i):
yield "$" + reg
if mm >= 1:
- for i,letter in enumerate(seq):
- #We'll use a set to remove any duplicate patterns
- #if letter not in "NX":
- pattern = seq[:i] + "N" + seq[i+1:]
+ for i, letter in enumerate(seq):
+ # We'll use a set to remove any duplicate patterns
+ # if letter not in "NX":
+ pattern = seq[:i] + "N" + seq[i + 1:]
assert len(pattern) == len(seq), "Len %s is %i, len %s is %i" \
% (pattern, len(pattern), seq, len(seq))
yield make_reg_ex(pattern)
- if mm >=2:
- for i,letter in enumerate(seq):
- #We'll use a set to remove any duplicate patterns
- #if letter not in "NX":
- for k,letter in enumerate(seq[i+1:]):
- #We'll use a set to remove any duplicate patterns
- #if letter not in "NX":
- pattern = seq[:i] + "N" + seq[i+1:i+1+k] + "N" + seq[i+k+2:]
+ if mm >= 2:
+ for i, letter in enumerate(seq):
+ # We'll use a set to remove any duplicate patterns
+ # if letter not in "NX":
+ for k, letter in enumerate(seq[i + 1:]):
+ # We'll use a set to remove any duplicate patterns
+ # if letter not in "NX":
+ pattern = seq[:i] + "N" + seq[i + 1:i + 1 + k] + "N" + seq[i + k + 2:]
assert len(pattern) == len(seq), "Len %s is %i, len %s is %i" \
% (pattern, len(pattern), seq, len(seq))
yield make_reg_ex(pattern)
+
def load_primers_as_re(primer_fasta, mm, rc=False):
- #Read primer file and record all specified sequences
+ # Read primer file and record all specified sequences
primers = set()
in_handle = open(primer_fasta, "rU")
reader = fastaReader(in_handle)
@@ -176,19 +174,18 @@
seq = reverse_complement(record.sequence)
else:
seq = record.sequence
- #primers.add(re.compile(make_reg_ex(seq)))
+ # primers.add(re.compile(make_reg_ex(seq)))
count += 1
for pattern in make_reg_ex_mm(seq, mm):
primers.add(pattern)
in_handle.close()
- #Use set to avoid duplicates, sort to have longest first
- #(so more specific primers found before less specific ones)
+ # Use set to avoid duplicates, sort to have longest first
+ # (so more specific primers found before less specific ones)
primers = sorted(set(primers), key=lambda p: -len(p))
- return count, re.compile("|".join(primers)) #make one monster re!
+ return count, re.compile("|".join(primers)) # make one monster re!
-
-#Read primer file and record all specified sequences
+# Read primer file and record all specified sequences
count, primer = load_primers_as_re(primer_fasta, mm, rc)
print "%i primer sequences" % count
@@ -197,8 +194,8 @@
clipped = 0
negs = 0
-if seq_format.lower()=="sff":
- #SFF is different because we just change the trim points
+if seq_format.lower() == "sff":
+ # SFF is different because we just change the trim points
if forward:
def process(records):
global short_clipped, short_neg, clipped, negs
@@ -208,8 +205,8 @@
seq = str(record.seq)[left_clip:right_clip].upper()
result = primer.search(seq)
if result:
- #Forward primer, take everything after it
- #so move the left clip along
+ # Forward primer, take everything after it
+ # so move the left clip along
if len(seq) - result.end() >= min_len:
record.annotations["clip_qual_left"] = left_clip + result.end()
clipped += 1
@@ -231,8 +228,8 @@
seq = str(record.seq)[left_clip:right_clip].upper()
result = primer.search(seq)
if result:
- #Reverse primer, take everything before it
- #so move the right clip back
+ # Reverse primer, take everything before it
+ # so move the right clip back
new_len = result.start()
if new_len >= min_len:
record.annotations["clip_qual_right"] = left_clip + new_len
@@ -246,7 +243,7 @@
yield record
else:
short_neg += 1
-
+
in_handle = open(in_file, "rb")
try:
manifest = ReadRocheXmlManifest(in_handle)
@@ -256,7 +253,7 @@
out_handle = open(out_file, "wb")
writer = SffWriter(out_handle, xml=manifest)
writer.write_file(process(SffIterator(in_handle)))
- #End of SFF code
+ # End of SFF code
elif seq_format.lower().startswith("fastq"):
in_handle = open(in_file, "rU")
out_handle = open(out_file, "w")
@@ -267,7 +264,7 @@
seq = record.sequence.upper()
result = primer.search(seq)
if result:
- #Forward primer, take everything after it
+ # Forward primer, take everything after it
cut = result.end()
record.sequence = seq[cut:]
if len(record.sequence) >= min_len:
@@ -281,13 +278,13 @@
negs += 1
writer.write(record)
else:
- short_negs += 1
+ short_neg += 1
else:
for record in reader:
seq = record.sequence.upper()
result = primer.search(seq)
if result:
- #Reverse primer, take everything before it
+ # Reverse primer, take everything before it
cut = result.start()
record.sequence = seq[:cut]
if len(record.sequence) >= min_len:
@@ -301,19 +298,19 @@
negs += 1
writer.write(record)
else:
- short_negs += 1
-elif seq_format.lower()=="fasta":
+ short_neg += 1
+elif seq_format.lower() == "fasta":
in_handle = open(in_file, "rU")
out_handle = open(out_file, "w")
reader = fastaReader(in_handle)
writer = fastaWriter(out_handle)
- #Following code is identical to that for FASTQ but without editing qualities
+ # Following code is identical to that for FASTQ but without editing qualities
if forward:
for record in reader:
seq = record.sequence.upper()
result = primer.search(seq)
if result:
- #Forward primer, take everything after it
+ # Forward primer, take everything after it
cut = result.end()
record.sequence = seq[cut:]
if len(record.sequence) >= min_len:
@@ -326,13 +323,13 @@
negs += 1
writer.write(record)
else:
- short_negs += 1
+ short_neg += 1
else:
for record in reader:
seq = record.sequence.upper()
result = primer.search(seq)
if result:
- #Reverse primer, take everything before it
+ # Reverse primer, take everything before it
cut = result.start()
record.sequence = seq[:cut]
if len(record.sequence) >= min_len:
@@ -345,9 +342,9 @@
negs += 1
writer.write(record)
else:
- short_negs += 1
+ short_neg += 1
else:
- stop_err("Unsupported file type %r" % seq_format)
+ sys.exit("Unsupported file type %r" % seq_format)
in_handle.close()
out_handle.close()
diff -r 708ce1e5ac94 -r 9b074c1db68e tools/seq_primer_clip/seq_primer_clip.xml
--- a/tools/seq_primer_clip/seq_primer_clip.xml Thu Nov 21 10:47:23 2013 -0500
+++ b/tools/seq_primer_clip/seq_primer_clip.xml Thu Feb 02 11:52:37 2017 -0500
@@ -1,20 +1,21 @@
-
+Trim off 5' or 3' primers
- biopython
+ galaxy_sequence_utils
+ biopythonBio
- seq_primer_clip.py --version
-
-seq_primer_clip.py $input_file $input_file.ext $primer_fasta $primer_type $mm $min_len $keep_negatives $output_file
-
+ seq_primer_clip.py --version
+
+seq_primer_clip.py $input_file $input_file.ext $primer_fasta $primer_type $mm $min_len $keep_negatives $output_file
+
-
+
@@ -28,18 +29,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
+
@@ -97,9 +87,6 @@
-
- Bio
-
**What it does**
@@ -140,4 +127,7 @@
This tool is available to install into other Galaxy Instances via the Galaxy
Tool Shed at http://toolshed.g2.bx.psu.edu/view/peterjc/seq_primer_clip
+
+ 10.1093/bioinformatics/btp163
+
diff -r 708ce1e5ac94 -r 9b074c1db68e tools/seq_primer_clip/tool_dependencies.xml
--- a/tools/seq_primer_clip/tool_dependencies.xml Thu Nov 21 10:47:23 2013 -0500
+++ b/tools/seq_primer_clip/tool_dependencies.xml Thu Feb 02 11:52:37 2017 -0500
@@ -1,6 +1,9 @@
-
-
+
+
+
+
+