Repository 'mdust'
hg clone https://toolshed.g2.bx.psu.edu/repos/urgi-team/mdust

Changeset 0:cc6db1ee2d48 (2015-10-05)
Next changeset 1:034d5b66e97a (2015-11-18)
Commit message:
Uploaded
added:
mdust.xml
mdust_wrapper.py
test-data/masked-seq-mdust.fsa
test-data/seq-mdust.fsa
tool_dependencies.xml
b
diff -r 000000000000 -r cc6db1ee2d48 mdust.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mdust.xml Mon Oct 05 11:05:21 2015 -0400
[
@@ -0,0 +1,87 @@
+<?xml version="1.0"?>
+<tool id="mdust" name="mdust" version="1.0">
+ <description>fast and symmetric DUST implementation to mask low-complexity DNA sequences</description>
+  <requirements>
+  <requirement type="package" version="1.0">mdust</requirement>
+  </requirements>
+ <command interpreter="python">
+ mdust_wrapper.py -i ${input_file} -v ${cutoff} -w ${wsize} -m ${masking_letter.masking_letter_selector} -f ${output_type.output_type_selector}
+
+ #if str( $output_type.output_type_selector) == "default"
+ -o ${output_masked_fasta}
+ #end if
+
+ #if str( $output_type.output_type_selector) == "tab"
+ -o ${output_tab}
+ #end if
+
+ #if str( $output_type.output_type_selector) == "bed"
+ -o ${output_bed}
+ #end if
+ </command>
+ <inputs>
+ <param name="input_file" type="data" format="fasta" label="Input fasta file" help=""/>
+ <param name="cutoff" type="integer" value="28" label="cutt-off" help="cut-off" />
+ <param name="wsize" type="integer" value="3" label="set maximum word size" help="word-size" />
+ <conditional name="masking_letter">
+ <param name="masking_letter_selector" type="select" label="Choose your masking type" help="Select the masking letter (N,X or lowercase)" >
+ <option value="N" selected="True">N (default)</option>
+ <option value="X">X</option>
+ <option value="L">lowercase</option>
+ </param>
+ </conditional>
+ <conditional name="output_type">
+ <param name="output_type_selector" type="select" label="Choose your output type" help="select your output">
+ <option value="default" selected="True">masked file (default)</option>
+ <option value="tab">output masking coordinates only (tab delimited)</option>
+ <option value="bed">output masking coordinates only (bed format)</option>
+ </param>
+ </conditional>
+ </inputs>
+ <outputs>
+ <data format="fasta" name="output_masked_fasta" label="masked.${input_file.name}">
+ <filter>( output_type['output_type_selector'] == 'default' )</filter>
+ </data>
+ <data format="tabular" name="output_tab" label="${input_file.name}.tab">
+ <filter>( output_type['output_type_selector'] == 'tab' )</filter>
+ </data>
+ <data format="bed" name="output_bed" label="${input_file.name}.bed">
+ <filter>( output_type['output_type_selector'] == 'bed' )</filter>
+ </data>
+ </outputs>
+ <tests>
+ <test>
+ <param name="input_file" value="seq-mdust.fsa" />
+ <output name="output_masked_fasta" file="masked-seq-mdust.fsa" ftype="fasta" />
+ </test>
+ </tests>
+  <help><![CDATA[
+mdust new implementation of the DUST module that uses the same function to assign a complexity score to a sequence, but uses a different rule by which high-scoring sequences are masked.
+
+-----
+
+**Usage:** ::
+
+ mdust [<fasta-file>] [-w <wsize>] [-v <cut-off>] [-m N|X|L] [-c] 
+   if no <fasta-file> is given, a multi-fasta stream is expected at stdin
+   -v default <cut-off> value is 28 (lower values might mask more, 
+   but possibly still useful sequence; > 64 will rarely mask poly-triplets)
+   -w set maximum word size to <wsize> (default 3)
+   -m if fasta output is not disabled by -c, set the masking letter type:
+      N ('N', default), X ('X'), L (make lowercase)
+   -c output masking coordinates only: 
+      seq_name, seqlength, mask_start, mask_end  (tab delimited)
+
+-----
+
+**Reference:** ::
+
+  A fast and symmetric DUST implementation to mask low-complexity DNA sequences.
+  Morgulis A, Gertz EM, Schäffer AA, Agarwala R.
+  J Comput Biol. 2006 Jun;13(5):1028-40.
+  PMID:16796549
+
+
+]]>
+ </help>  
+</tool>
b
diff -r 000000000000 -r cc6db1ee2d48 mdust_wrapper.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mdust_wrapper.py Mon Oct 05 11:05:21 2015 -0400
[
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+
+import subprocess
+import tempfile
+import sys
+import os
+import re
+from optparse import OptionParser
+
+
+class MdustWrapper(object):
+
+    def __init__(self):
+        self._options = None
+
+    def stop_err(self, msg):
+        sys.stderr.write("%s\n" % msg)
+        sys.exit()
+
+    def setAttributesFromCmdLine(self):
+        description = "mdust_wrapper"
+        description += "\nWrapper for mdust\n"
+        description += "example: mdust_wrapper.py -i seq.fasta -v 27\n"
+        parser = OptionParser(description = description, version = "0.1")
+        parser.add_option("-i", "--input",    dest = "FastaFile",   action = "store", type = "string", help = "Input Fasta File name [compulsory] [format: Fasta]", default = "")
+        parser.add_option("-o", "--output", dest = "outFile",   action = "store", type = "string", help = "output File name [compulsory] [format: fasta,tab or bed]", default = "")
+        parser.add_option("-v", "--cutoff",  dest = "cutoff",  action = "store", type = "int", help = "cutoff", default = 28)
+        parser.add_option("-w", "--wsize",  dest = "wsize",  action = "store", type = "int", help = "window size", default = 3)
+        parser.add_option("-m", "--maskingletter",  dest = "maskingletter",  action = "store", type = "string", help = "masking letter", default = "N")
+        parser.add_option("-f", "--format",    dest = "format",  action = "store", type = "string", help = "format", default = "default")
+        options = parser.parse_args()[0]
+        self._setAttributesFromOptions(options)
+
+
+    def _setAttributesFromOptions(self, options):
+        self._options = options
+
+        if self._options.FastaFile == "":
+            raise Exception("Missing input file, please provide fasta file with -i file !")
+        if self._options.outFile == "":
+            raise Exception("Missing output file, please provide output file with -o file !")
+
+
+    def run(self):
+
+ prg = "mdust"
+ args = ""
+ args += " %s" % self._options.FastaFile
+                args += " -v %d" % self._options.cutoff
+                args += " -w %d" % self._options.wsize
+                args += " -m %s" % self._options.maskingletter
+                if self._options.format == "tab" or self._options.format == "bed":
+                    args += " -c "
+ cmd = "%s %s" %(prg, args)
+
+ try:
+ tmp_err = tempfile.NamedTemporaryFile().name
+                        tmp_out = "outfile"
+ tmp_stderr = open( tmp_err, 'wb' )
+ tmp_stdout = open( tmp_out, 'wb' )
+ proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr )
+ returncode = proc.wait()
+ tmp_stderr.close()
+ # get stderr, allowing for case where it's very large
+ tmp_stderr = open( tmp_err, 'rb' )
+ tmp_stdout = open( tmp_out, 'rb' )
+
+ stderr = ''
+                        stdout = ''
+ buffsize = 1048576
+ try:
+ while True:
+ stdout += tmp_stdout.read( buffsize )
+ if not stdout or len( stdout ) % buffsize != 0:
+ break
+ except OverflowError:
+ pass
+ tmp_stdout.close()
+
+                        try:
+ while True:
+ stderr += tmp_stderr.read( buffsize )
+ if not stderr or len( stderr ) % buffsize != 0:
+ break
+ except OverflowError:
+ pass
+ tmp_stderr.close()
+ if stderr:
+ raise Exception, stderr
+ except Exception, e:
+ self.stop_err( 'Error with mdust :\n' + str( e ) )
+
+                if self._options.format == 'bed':
+                    with open(tmp_out,"r") as fin:
+                        with open(self._options.outFile, "w") as fout:
+                            lineNumber = 0
+                            for line in fin:
+                                lineNumber += 1
+                                m = re.search(r"^(\S+)\t(\d+)\t(\d+)\t(\d+)$", line)
+                                if m is not None:
+                                    fout.write("%s\t%d\t%d\n" % (m.group(1), int(m.group(3))-1, int(m.group(4))))
+                                if m is None:
+                                    raise Exception("\nLine %d '%s' does not has a mdust format." % (lineNumber, line))
+                else:
+                    os.rename(tmp_out,self._options.outFile)
+
+
+
+
+if __name__ == "__main__":
+ iWrapper = MdustWrapper()
+ iWrapper.setAttributesFromCmdLine()
+ iWrapper.run()
b
diff -r 000000000000 -r cc6db1ee2d48 test-data/masked-seq-mdust.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/masked-seq-mdust.fsa Mon Oct 05 11:05:21 2015 -0400
b
b'@@ -0,0 +1,5000 @@\n+>seqmdust\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+NNNNcaccctcaccctCGgtcgtacggcgtgacaatgtctccagaatcgatttggtcttg\n+acgaaagcagcgcacatagcacttccggatgattttgtcgcatgctttgtcaaacacgac\n+gtccgtaagtactagctcgttcggcctagctttcgcgcccgtctgctgctcgctcttccg\n+caagaattgacgcagaacgacgcgccgcttctgggtatcgaactcgataatttgggctgg\n+ctggagccggtcctcgtcaaggtcgtcattccattcgcggacgaggacggtgtcaccaat\n+tgcatatttgctccggcactcgtcgaatataggaacgtcatgcttgcactggcacctaaa\n+gtcgcttgcttgaattgcaacgaagtcgtatgtgtcttgctcgtaaactgtgcggaactt\n+ctggcgcacaaagtgtcctccctgctaatatggatccttggcatactaacatacgtctgc\n+cttaaagcgcgccttcggatacgcgagaagtccaacacgaggactcgagagagatacttc\n+cgcaaaaacgacagagaacacagatacaactactctgttatcgactccgacaatacaagc\n+atttcgtagaacacgagagggctaactacgcaaaagacgatctctttagcgtctggaaaa\n+cccgatacgaaatagcaagacaacgggagatcaataaggtacctaggaatacaaatagct\n+accttagtcgatttccatggagctcgccaggaaaacacagaataggaggaagacgcgctt\n+taactagtgccttctatactatcctattaggacacggatacctaaagtcctatctctatc\n+gctttggacacgtaacgacgccagactgcccttgcggacgacgagaaacagcagaacacc\n+tcgtatgtagctgccctatctacgatgtatttaggccactaccgctacgacaagcacaat\n+ctctactagaaatagtagaagataaggaactccggaaacacctcactacctttctctcgg\n+acacaaagattgcttctaggtcctggcacctagcaagaggcgaagacgagaccgtctagc\n+ctacactccttttacttttccttatttgtctttctttaaagtccttcgatactagctggc\n+tactatcctaggcacgggtctttccctaaggtcaagagatacgagagaacaacgattgta\n+catagatagactgctgtcgcgagacagccgtcctacatagtcatagactcctacaggacg\n+taaatGGaacaaacaaacaaacaaacaaatacgtctgccttatctaagacgcaatctagg\n+tcgagagccgatgtgccgcagctgcagttgtcggacaagaagagctcattcgcaaaaggg\n+taataggctttgccaagtgtagtgtcctttggctcgtacagccagagaacatcgagcgaa\n+gtacaatctttgcgagtgtgcacgttctaaacataggcataccatgcgtgcgctttgatc\n+ccctatttggtactgacatccggcgcgacgcatacgacatcccctgtgttcaccgtgact\n+gctttgttcccctcttgagattcgcttcggctcatactcctaggcgttaaggtttgcatt\n+ggcggtgcactcggagatgctcgtcgcggcgtcagtctgagccttacttttagctcatcc\n+gacttcttcagcacatttcggtccgtgatcgtccgctcttccagctggttaccgaagtac\n+atatgtcgaaagcactggtagacaaaaggcgtgacgatcgttcgcaactccttgtttggc\n+tgcgctgggatagctgtaagattatagggatcgacttcaccctagatcggttgtcggcac\n+aatccagactctttgtcatcgatatcccagcattctttccaaaggaagccgacatttgcg\n+gcaatggtcgtgcggaagtctttgagtttgcattcagctagccattagttaaagctagta\n+ttatcgccgtagagtctagttagctattggtaaaatcgtctgcggaaatgtccaagggta\n+acctgctggttgctgaggaggtagtcgacgaagtgcttcccgaactgtgctagctaaagg\n+aacggctcgtagaatcgacgatattccggtgcaggcgttcccagctcgtaccaatgacta\n+aacagagcggacgctttcgactacagcttcgattaaatgcagatgctactggcaaaatct\n+gtaaagtccaggtcgccgtacctactgatagccaagatctcgaactgcacgccctacatg\n+tagcgcttttcgttgcctgccgatagaatgccatcgaaacagaactctgcatagtttgat\n+tgccgcagtttctcaagagtgacgaggtccgatgcgcgtcgtccgtccgctatgtatacc\n+gagaattgatcgagtttgaaagtagcacattccggagtttcggtgctagcagacttcgca\n+gctgcgaagaagcctgcactcaagtctaggcttgcttcgacctcgtttacaggttgcaaa\n+ggaggtttgtatccgtagtaagcgcttctcggaaatgctggtcctacaatggagatgtgg\n+cttaagtagcgaagctgccgtcgtttcccggaacgagctgtggccccaggctctgctcta\n+gtagcaggctcgtcccctagttctgtttcggtcgctggaggagtgagtgcagtcctgagc\n+cttcgttgtctgggcgtgcgtagtgggccctagtccttaacttcccgatctccgactgtt\n+tggcttgtctgctcctcgcccaattctatcttatactaaaccgctctctgcactccatct\n+tcttcttgttgtttcgactcgcctcggtcctgctcagaagccgcaggactgattacaatc\n+gcattgcttagggaagctctagtgcgcaaaatgtctatctacaattgtccattctgaagc\n+cgcggcttcttggaaggcctcgaagtaggtcctaccacaatgttgtcctcgtcatcctga\n+atgacaatcgtgtcgccgctgatccgtctcttcatggttgccgtgctacttaatgccgtc\n+gtgtgcgaggtagtaatagggttaagaatgccgctgtcttgctacaaaggcgccatgttc\n+ttttcgttgctcgaagcagtccgtccgcggtaagtgtgccaaatcgcacaatgcaagtgc\n+tagagctgcaagtgaagtccaagagtgaatgcactaactatttaccttgcaagccgtgca\n+cgcagtgtaatgatctgccgaagcaaagaagaaatagcagaccaagatatccgcaaaaca\n+aatatccgtagaggtaaaaaggaatgcgcgcaacttgccagtaaaacgttgcttcgtgag\n+gtagttaatgaagtgtagtctggaagctttgaagtgcgaccaaggatgttctagccattg\n+acttttgaccgcgcctgcacctcaatgcccgtaatggccgccatcgcgaatggcatatgt\n+tagcgctgttaaattcaaatagtagaagtgagatggattatattcgagattacttatgta\n+cgcctacttgagggcggtggcatgtaacccgagtcttcgcttaagtgttgttacgaacgc\n+ttgaggtcctcctcgactgttccttgtccggctacggcatcgcgctgggcccaggcctct\n+cgtattcggggtttggcgtccctcgctcttttgtatgctcggagagggcggctcaaaagt\n+'..b'CAGCCAAGCAGCAGCCAAAGTACGACGCAC\n+AAACCAGGATGATTCACCATCAAAGCAAACAAATATGAGGTTCAACGAGAGGTGTGAGGT\n+ATAATGACTTCGAGCATGAGCCGGTGCTTGGCCCTTCTCGAGTATGCTCGTGCCATCTGC\n+TGTCCTTTTCGAAGATATTGCATGTGCATGAACATTTCTGGAGAAGGGCCTCGCTGAGAT\n+CACACGGCTCGAACTTGAACCAGATAATGCTGGCGATAAAGAATCATGCTCTTCCCCTAC\n+AGTCCAATCCCTACGAGCGAGGACTAACCCATCTAGGTATACACAACGGCAAAAGCGAAG\n+CGACCATGCCAGATCTCGACTCGAGAGATGTCGAGAAGAATGCCCTGGATTCACCTGAGA\n+TGCACGGTCCGGAGTCGGCCACATCCTCCACCGCCTCAGAAGGCCGCTCAAGGAACTTCC\n+TCAGCAGCGTCAAGCAGTCCGTCGTACACGACAACTTCGAGGCTCGCGGCGTCCAACGAG\n+TACTCCCAGCCGAGCGCAATCCGACATCCACATTTGGCTTTCTCCAAATCATGCTTATGT\n+GGATGAGCATCAACATGACCGCCGTCGTGATCGTCCTGGGCTTCCTCGGGCCCATCACTT\n+TCTCCTTATCCTTCAAAGATGCCTCTCTCCTTGCAGTCTTCGGCGCAATAGTTGGAGCGA\n+CGCCGGTCGCCTACATCGCCACTTTCGGCCCTCGCTCCGGCAACCGCACCATGATCCTCA\n+CCCGATACATCACCGGTTGGTGGCCATCCAAAGTCATCGTCATTCTCACACTCATCATCC\n+TCATGGGCTACGTCCTCCTCGACGCCGTTATTGGCGGTCAGATCCTCTCAGCCGTCAGTC\n+CCAACGCTTCCCTCAGCGTCATAGTCGGCATCGTCATCGTCTGCATCCTCACCTGGATCG\n+TGACCGCATTCGGCTATTGTGTCTTCCATCACTTCGAGCGCTACGCCTGGATCCCCTCCC\n+TCATCGTCATTTGTCTCCTCATCGGCGTCTCCGCTCCAAAATGGAATCTCGAACCAGGTC\n+CCGCTCTCCCACCTCGCACCCTCGCCGGCAACCGCCTCTCCTTCTTCTCTCTTTGTCTCG\n+CCGCCGAAATCACCTACGCCGAATCGGGAGCCGACATGTTTGTCTACTATCCATCCACCA\n+CTTCCCGCACAAAAGTCTTTCTCTCCACCCTCGCTGGTCTCACCCTCTCCTCCGCCATCG\n+CCCTCATCACCGGTATTGGCCTCGGCTCCGGCACTCTCACCGATCCAGCCTGGTCCTCCG\n+CCTACTCCGTCTCCGCAGGTGCCCTCATCGTTGAAGCATTCCGTCCACTCGGTGGATTCG\n+GCTCATTCTGCAGCGTTCTCGTTGCACTGGGTATAGTCGCAAATATGGTCCTGCCGACCT\n+ACGCTTCAGGAGTGGACTTCCAAGCGTTCGGCCGATGGTTCGAAAGGGTCCCGCGGATCG\n+TGTGGAATACCGTTGCGCTGGTCGTACCGATGATTGGAGCAATCGCAGGACGAGAACACC\n+TGGCGGAGATCTTCTCCAACTTCCTGGCATTGATGGGTTATTGGGTGTCTATCTGGATTG\n+CGATCGTGGTGGAAGAGCACATCTTGTTCCGAAAGATGGGAGGCAAGGGTTGGAAGTGGG\n+AGGACTGGAATGACAGGACGAAGCTCCCTATCGGTATTGCGGCGGGCGTTGCGTTCCTTG\n+TGGGATGGGTTGGAGCGATTATGGGTATGTCGCAGGTCTGGTATGTGGGACCGTTGGCGA\n+AGCTGGTCAGTGAGCAGGGAGCGGATATCGGGAATTATGTTGGGTTTTCGTGGGCGTTGG\n+TGGTGTATCCGCCTTTGAGGTGGGTGGAGCTGAGGTGGGTTGGGAGATAGGAGGGCAGAT\n+GGAGGATGGAGGAAGTCGGTGGACAGATCAAGGCGGAGAATCGGAGGTTGTGAAGCCCTC\n+GAGTTGTGGCTCGGGATGTGCCTGCGCTGTTTATCTTTGCGGCACTCAACTCCGCCAACA\n+GCATGCATTCGTGCGAGCAACGCAGACAAATTCAGCGTATCCTCTCGAGCATTGATATGG\n+CAAGCCCGAATACGCTCAACTCTGCGCCCTTCTCCGTTCATTCAACAGCATATACTCATA\n+CCGACATCAATATTCGTTATCATTATTTGCACAACTAGCGGTGAATCGCGATGAAGCCCG\n+ACTTACCTTAGTCATGCACTGCATCCGACATCGGGACGCGTATCGCCCGCATTCTTGGCA\n+CCTCCCCGGGCGCAAAATCCTGTTATCTTCCCGGCTGGCTCCCTCCTTCTTTTCATATGC\n+ACGCGTAATCCTCACCGTCTCTCCCTTTCACATTCACGCACCAGTTCACAATAACAATAT\n+TCACGCTTGACTTCACCTCACTTCACTGCCACCCCTCCCTACCTCACTCTTTCCCACTGC\n+TGCGCAGTCATGCCTCGCCCTCCAGACCACTACACCCACGCCCAATTCGAGCCCGTCGAG\n+TCGGACCACGCCGGCGAAACGGTCCAATGTCGCCACTGCCGCAACTGGACGGGTTCCATC\n+AAGACGCTTAATCGGAAAAAAGCTCATCTTCTCACGTGCACGCAGTATGCGCAGTGGCGG\n+GCGGCGGGGAATGGGCAGGATTTGGCGCCGCCGAATAAGTATCATAAGAGGGATAGTTCG\n+GTTATGGGTGGTGGGTGGGAGGGGCAGGGGGATGGGTCAGTGATCTTCTTTTGATGTTGT\n+GGTGTTTTGTGGTGGCTTGAAGATGGAATTGCGATGATTTGGGTTCTTTGTGATGTTGGT\n+GGAATGCTAACGGTGTTATTGCAGGAATACCTCTGGCTTCAATATGTCGCCGTTCAATGA\n+TACCCCGACGGTGGCTCGTGGCCGCAATCTTGATCTGACCAAGGTAGACATTGACAAGAT\n+GTTCCAATTCGCTGCAGTATACTGATCATGTACAGTACTTCTCCGAATTCTGGGACGACA\n+CCGCTTCGAACAAGGTCAGTGACTATCTACAGGATTTTACAGGCAATCTGTCTATTGTCA\n+CTTTCGTGATCCTCTGCATCTTCAGATCCAGCGCCACAAGTACTTACAAGTATCTACCAG\n+TGCATGCGAGTACGCTGCCTCTCCTGCGGCTTCGTCCGGGCAAAGAACACCACCCGCCAA\n+GTCGAACATCTTGCCAGCTGCGCCTCGTTCCTCAACAGCACCGAAGGTCAAGCCGCTGTC\n+GCGAACGGCGAACTCGAAATGACACCTGCCGCACCTCGCCAGTCTTTCGGAGGCGGGAAT\n+GACATCTGGCGTGGCGGTGCACCAAACCCGAATCTGCAAGTCAGCCAGACGCCAACTTCG\n+ACCAGCCGTGGTGGAGGAAGAGCCTATCCGATGCCGCCTCCTCCCAAAGCACCCTCCCTC\n+GTATCTCACCTCCTCAACAAGTTCCAGGAAAAGTTCAATGTTGCGACTCAACAGTCTTTC\n+CTCTCTCACGCCGGCTGCGGTACTCTGTCCCACGCAGCTCTCTGCTCCTGGCTCACACAG\n+CATGGCCACATTTCTCGAGCTATGATTGCCGCCATTGGATCACTCATCGCCAAAGTGTTC\n+CTGCCAGATGCCGCCAATACTCGCATTGCTACGCCATACCGTGCTCTGGACCTGCTCATC\n+TCCACGATCAGCAATCTTCGTAAAGAGATCGACTTCATCGAGAATACTAAGCGGAAGTAT\n+CGCTTGGACGCTGCGAGTGAGCCGCCGTCGCCGATGACGAAGGCATACGTCGATCTCTTG\n+GCGAGTGCTTCTGAGCCGAGAGCAGATCTGCTGGAGGGAATGGTAGCGCTGTGGGCGACC\n+GAGCATGTATGTTGACCACCTTTTCCGTTTCTGTGTTCGATGTCACTAACTCTATACCAG\n'
b
diff -r 000000000000 -r cc6db1ee2d48 test-data/seq-mdust.fsa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seq-mdust.fsa Mon Oct 05 11:05:21 2015 -0400
b
b'@@ -0,0 +1,5000 @@\n+>seqmdust\n+ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa\n+ccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaaccctaa\n+ccctcaccctcaccctCGgtcgtacggcgtgacaatgtctccagaatcgatttggtcttg\n+acgaaagcagcgcacatagcacttccggatgattttgtcgcatgctttgtcaaacacgac\n+gtccgtaagtactagctcgttcggcctagctttcgcgcccgtctgctgctcgctcttccg\n+caagaattgacgcagaacgacgcgccgcttctgggtatcgaactcgataatttgggctgg\n+ctggagccggtcctcgtcaaggtcgtcattccattcgcggacgaggacggtgtcaccaat\n+tgcatatttgctccggcactcgtcgaatataggaacgtcatgcttgcactggcacctaaa\n+gtcgcttgcttgaattgcaacgaagtcgtatgtgtcttgctcgtaaactgtgcggaactt\n+ctggcgcacaaagtgtcctccctgctaatatggatccttggcatactaacatacgtctgc\n+cttaaagcgcgccttcggatacgcgagaagtccaacacgaggactcgagagagatacttc\n+cgcaaaaacgacagagaacacagatacaactactctgttatcgactccgacaatacaagc\n+atttcgtagaacacgagagggctaactacgcaaaagacgatctctttagcgtctggaaaa\n+cccgatacgaaatagcaagacaacgggagatcaataaggtacctaggaatacaaatagct\n+accttagtcgatttccatggagctcgccaggaaaacacagaataggaggaagacgcgctt\n+taactagtgccttctatactatcctattaggacacggatacctaaagtcctatctctatc\n+gctttggacacgtaacgacgccagactgcccttgcggacgacgagaaacagcagaacacc\n+tcgtatgtagctgccctatctacgatgtatttaggccactaccgctacgacaagcacaat\n+ctctactagaaatagtagaagataaggaactccggaaacacctcactacctttctctcgg\n+acacaaagattgcttctaggtcctggcacctagcaagaggcgaagacgagaccgtctagc\n+ctacactccttttacttttccttatttgtctttctttaaagtccttcgatactagctggc\n+tactatcctaggcacgggtctttccctaaggtcaagagatacgagagaacaacgattgta\n+catagatagactgctgtcgcgagacagccgtcctacatagtcatagactcctacaggacg\n+taaatGGaacaaacaaacaaacaaacaaatacgtctgccttatctaagacgcaatctagg\n+tcgagagccgatgtgccgcagctgcagttgtcggacaagaagagctcattcgcaaaaggg\n+taataggctttgccaagtgtagtgtcctttggctcgtacagccagagaacatcgagcgaa\n+gtacaatctttgcgagtgtgcacgttctaaacataggcataccatgcgtgcgctttgatc\n+ccctatttggtactgacatccggcgcgacgcatacgacatcccctgtgttcaccgtgact\n+gctttgttcccctcttgagattcgcttcggctcatactcctaggcgttaaggtttgcatt\n+ggcggtgcactcggagatgctcgtcgcggcgtcagtctgagccttacttttagctcatcc\n+gacttcttcagcacatttcggtccgtgatcgtccgctcttccagctggttaccgaagtac\n+atatgtcgaaagcactggtagacaaaaggcgtgacgatcgttcgcaactccttgtttggc\n+tgcgctgggatagctgtaagattatagggatcgacttcaccctagatcggttgtcggcac\n+aatccagactctttgtcatcgatatcccagcattctttccaaaggaagccgacatttgcg\n+gcaatggtcgtgcggaagtctttgagtttgcattcagctagccattagttaaagctagta\n+ttatcgccgtagagtctagttagctattggtaaaatcgtctgcggaaatgtccaagggta\n+acctgctggttgctgaggaggtagtcgacgaagtgcttcccgaactgtgctagctaaagg\n+aacggctcgtagaatcgacgatattccggtgcaggcgttcccagctcgtaccaatgacta\n+aacagagcggacgctttcgactacagcttcgattaaatgcagatgctactggcaaaatct\n+gtaaagtccaggtcgccgtacctactgatagccaagatctcgaactgcacgccctacatg\n+tagcgcttttcgttgcctgccgatagaatgccatcgaaacagaactctgcatagtttgat\n+tgccgcagtttctcaagagtgacgaggtccgatgcgcgtcgtccgtccgctatgtatacc\n+gagaattgatcgagtttgaaagtagcacattccggagtttcggtgctagcagacttcgca\n+gctgcgaagaagcctgcactcaagtctaggcttgcttcgacctcgtttacaggttgcaaa\n+ggaggtttgtatccgtagtaagcgcttctcggaaatgctggtcctacaatggagatgtgg\n+cttaagtagcgaagctgccgtcgtttcccggaacgagctgtggccccaggctctgctcta\n+gtagcaggctcgtcccctagttctgtttcggtcgctggaggagtgagtgcagtcctgagc\n+cttcgttgtctgggcgtgcgtagtgggccctagtccttaacttcccgatctccgactgtt\n+tggcttgtctgctcctcgcccaattctatcttatactaaaccgctctctgcactccatct\n+tcttcttgttgtttcgactcgcctcggtcctgctcagaagccgcaggactgattacaatc\n+gcattgcttagggaagctctagtgcgcaaaatgtctatctacaattgtccattctgaagc\n+cgcggcttcttggaaggcctcgaagtaggtcctaccacaatgttgtcctcgtcatcctga\n+atgacaatcgtgtcgccgctgatccgtctcttcatggttgccgtgctacttaatgccgtc\n+gtgtgcgaggtagtaatagggttaagaatgccgctgtcttgctacaaaggcgccatgttc\n+ttttcgttgctcgaagcagtccgtccgcggtaagtgtgccaaatcgcacaatgcaagtgc\n+tagagctgcaagtgaagtccaagagtgaatgcactaactatttaccttgcaagccgtgca\n+cgcagtgtaatgatctgccgaagcaaagaagaaatagcagaccaagatatccgcaaaaca\n+aatatccgtagaggtaaaaaggaatgcgcgcaacttgccagtaaaacgttgcttcgtgag\n+gtagttaatgaagtgtagtctggaagctttgaagtgcgaccaaggatgttctagccattg\n+acttttgaccgcgcctgcacctcaatgcccgtaatggccgccatcgcgaatggcatatgt\n+tagcgctgttaaattcaaatagtagaagtgagatggattatattcgagattacttatgta\n+cgcctacttgagggcggtggcatgtaacccgagtcttcgcttaagtgttgttacgaacgc\n+ttgaggtcctcctcgactgttccttgtccggctacggcatcgcgctgggcccaggcctct\n+cgtattcggggtttggcgtccctcgctcttttgtatgctcggagagggcggctcaaaagt\n+'..b'CAGCCAAGCAGCAGCCAAAGTACGACGCAC\n+AAACCAGGATGATTCACCATCAAAGCAAACAAATATGAGGTTCAACGAGAGGTGTGAGGT\n+ATAATGACTTCGAGCATGAGCCGGTGCTTGGCCCTTCTCGAGTATGCTCGTGCCATCTGC\n+TGTCCTTTTCGAAGATATTGCATGTGCATGAACATTTCTGGAGAAGGGCCTCGCTGAGAT\n+CACACGGCTCGAACTTGAACCAGATAATGCTGGCGATAAAGAATCATGCTCTTCCCCTAC\n+AGTCCAATCCCTACGAGCGAGGACTAACCCATCTAGGTATACACAACGGCAAAAGCGAAG\n+CGACCATGCCAGATCTCGACTCGAGAGATGTCGAGAAGAATGCCCTGGATTCACCTGAGA\n+TGCACGGTCCGGAGTCGGCCACATCCTCCACCGCCTCAGAAGGCCGCTCAAGGAACTTCC\n+TCAGCAGCGTCAAGCAGTCCGTCGTACACGACAACTTCGAGGCTCGCGGCGTCCAACGAG\n+TACTCCCAGCCGAGCGCAATCCGACATCCACATTTGGCTTTCTCCAAATCATGCTTATGT\n+GGATGAGCATCAACATGACCGCCGTCGTGATCGTCCTGGGCTTCCTCGGGCCCATCACTT\n+TCTCCTTATCCTTCAAAGATGCCTCTCTCCTTGCAGTCTTCGGCGCAATAGTTGGAGCGA\n+CGCCGGTCGCCTACATCGCCACTTTCGGCCCTCGCTCCGGCAACCGCACCATGATCCTCA\n+CCCGATACATCACCGGTTGGTGGCCATCCAAAGTCATCGTCATTCTCACACTCATCATCC\n+TCATGGGCTACGTCCTCCTCGACGCCGTTATTGGCGGTCAGATCCTCTCAGCCGTCAGTC\n+CCAACGCTTCCCTCAGCGTCATAGTCGGCATCGTCATCGTCTGCATCCTCACCTGGATCG\n+TGACCGCATTCGGCTATTGTGTCTTCCATCACTTCGAGCGCTACGCCTGGATCCCCTCCC\n+TCATCGTCATTTGTCTCCTCATCGGCGTCTCCGCTCCAAAATGGAATCTCGAACCAGGTC\n+CCGCTCTCCCACCTCGCACCCTCGCCGGCAACCGCCTCTCCTTCTTCTCTCTTTGTCTCG\n+CCGCCGAAATCACCTACGCCGAATCGGGAGCCGACATGTTTGTCTACTATCCATCCACCA\n+CTTCCCGCACAAAAGTCTTTCTCTCCACCCTCGCTGGTCTCACCCTCTCCTCCGCCATCG\n+CCCTCATCACCGGTATTGGCCTCGGCTCCGGCACTCTCACCGATCCAGCCTGGTCCTCCG\n+CCTACTCCGTCTCCGCAGGTGCCCTCATCGTTGAAGCATTCCGTCCACTCGGTGGATTCG\n+GCTCATTCTGCAGCGTTCTCGTTGCACTGGGTATAGTCGCAAATATGGTCCTGCCGACCT\n+ACGCTTCAGGAGTGGACTTCCAAGCGTTCGGCCGATGGTTCGAAAGGGTCCCGCGGATCG\n+TGTGGAATACCGTTGCGCTGGTCGTACCGATGATTGGAGCAATCGCAGGACGAGAACACC\n+TGGCGGAGATCTTCTCCAACTTCCTGGCATTGATGGGTTATTGGGTGTCTATCTGGATTG\n+CGATCGTGGTGGAAGAGCACATCTTGTTCCGAAAGATGGGAGGCAAGGGTTGGAAGTGGG\n+AGGACTGGAATGACAGGACGAAGCTCCCTATCGGTATTGCGGCGGGCGTTGCGTTCCTTG\n+TGGGATGGGTTGGAGCGATTATGGGTATGTCGCAGGTCTGGTATGTGGGACCGTTGGCGA\n+AGCTGGTCAGTGAGCAGGGAGCGGATATCGGGAATTATGTTGGGTTTTCGTGGGCGTTGG\n+TGGTGTATCCGCCTTTGAGGTGGGTGGAGCTGAGGTGGGTTGGGAGATAGGAGGGCAGAT\n+GGAGGATGGAGGAAGTCGGTGGACAGATCAAGGCGGAGAATCGGAGGTTGTGAAGCCCTC\n+GAGTTGTGGCTCGGGATGTGCCTGCGCTGTTTATCTTTGCGGCACTCAACTCCGCCAACA\n+GCATGCATTCGTGCGAGCAACGCAGACAAATTCAGCGTATCCTCTCGAGCATTGATATGG\n+CAAGCCCGAATACGCTCAACTCTGCGCCCTTCTCCGTTCATTCAACAGCATATACTCATA\n+CCGACATCAATATTCGTTATCATTATTTGCACAACTAGCGGTGAATCGCGATGAAGCCCG\n+ACTTACCTTAGTCATGCACTGCATCCGACATCGGGACGCGTATCGCCCGCATTCTTGGCA\n+CCTCCCCGGGCGCAAAATCCTGTTATCTTCCCGGCTGGCTCCCTCCTTCTTTTCATATGC\n+ACGCGTAATCCTCACCGTCTCTCCCTTTCACATTCACGCACCAGTTCACAATAACAATAT\n+TCACGCTTGACTTCACCTCACTTCACTGCCACCCCTCCCTACCTCACTCTTTCCCACTGC\n+TGCGCAGTCATGCCTCGCCCTCCAGACCACTACACCCACGCCCAATTCGAGCCCGTCGAG\n+TCGGACCACGCCGGCGAAACGGTCCAATGTCGCCACTGCCGCAACTGGACGGGTTCCATC\n+AAGACGCTTAATCGGAAAAAAGCTCATCTTCTCACGTGCACGCAGTATGCGCAGTGGCGG\n+GCGGCGGGGAATGGGCAGGATTTGGCGCCGCCGAATAAGTATCATAAGAGGGATAGTTCG\n+GTTATGGGTGGTGGGTGGGAGGGGCAGGGGGATGGGTCAGTGATCTTCTTTTGATGTTGT\n+GGTGTTTTGTGGTGGCTTGAAGATGGAATTGCGATGATTTGGGTTCTTTGTGATGTTGGT\n+GGAATGCTAACGGTGTTATTGCAGGAATACCTCTGGCTTCAATATGTCGCCGTTCAATGA\n+TACCCCGACGGTGGCTCGTGGCCGCAATCTTGATCTGACCAAGGTAGACATTGACAAGAT\n+GTTCCAATTCGCTGCAGTATACTGATCATGTACAGTACTTCTCCGAATTCTGGGACGACA\n+CCGCTTCGAACAAGGTCAGTGACTATCTACAGGATTTTACAGGCAATCTGTCTATTGTCA\n+CTTTCGTGATCCTCTGCATCTTCAGATCCAGCGCCACAAGTACTTACAAGTATCTACCAG\n+TGCATGCGAGTACGCTGCCTCTCCTGCGGCTTCGTCCGGGCAAAGAACACCACCCGCCAA\n+GTCGAACATCTTGCCAGCTGCGCCTCGTTCCTCAACAGCACCGAAGGTCAAGCCGCTGTC\n+GCGAACGGCGAACTCGAAATGACACCTGCCGCACCTCGCCAGTCTTTCGGAGGCGGGAAT\n+GACATCTGGCGTGGCGGTGCACCAAACCCGAATCTGCAAGTCAGCCAGACGCCAACTTCG\n+ACCAGCCGTGGTGGAGGAAGAGCCTATCCGATGCCGCCTCCTCCCAAAGCACCCTCCCTC\n+GTATCTCACCTCCTCAACAAGTTCCAGGAAAAGTTCAATGTTGCGACTCAACAGTCTTTC\n+CTCTCTCACGCCGGCTGCGGTACTCTGTCCCACGCAGCTCTCTGCTCCTGGCTCACACAG\n+CATGGCCACATTTCTCGAGCTATGATTGCCGCCATTGGATCACTCATCGCCAAAGTGTTC\n+CTGCCAGATGCCGCCAATACTCGCATTGCTACGCCATACCGTGCTCTGGACCTGCTCATC\n+TCCACGATCAGCAATCTTCGTAAAGAGATCGACTTCATCGAGAATACTAAGCGGAAGTAT\n+CGCTTGGACGCTGCGAGTGAGCCGCCGTCGCCGATGACGAAGGCATACGTCGATCTCTTG\n+GCGAGTGCTTCTGAGCCGAGAGCAGATCTGCTGGAGGGAATGGTAGCGCTGTGGGCGACC\n+GAGCATGTATGTTGACCACCTTTTCCGTTTCTGTGTTCGATGTCACTAACTCTATACCAG\n'
b
diff -r 000000000000 -r cc6db1ee2d48 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Oct 05 11:05:21 2015 -0400
b
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool_dependency>
+ <package name="mdust" version="1.0">
+ <install version="1.0">
+ <actions>
+ <action type="download_by_url">ftp://occams.dfci.harvard.edu/pub/bio/tgi/software/seqclean/mdust.tar.gz</action>
+ <action type="shell_command">make</action>
+ <action type="move_file">
+ <source>mdust</source>
+ <destination>$INSTALL_DIR</destination>
+ </action>
+ <action type="set_environment">
+ <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable>
+ </action>
+ </actions>
+ </install>
+ </package>
+</tool_dependency>