Mercurial > repos > malex > tandem_repeats_finder

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trf.xml	Wed Nov 30 12:18:10 2011 -0500
@@ -0,0 +1,119 @@
+<tool id="trf" name="Tandem Repeats Finder" version="4.04">
+  <description>Detect tandem repeats that have undergone extensive mutational change</description>
+  <command interpreter="python">trf_wrapper.py
+      $input1 $match $mismatch $delta $mprobability $iprobability $minscore
+      $maxperiod $masked $flanking $noredundancy -o $outputdat -k $outputmask -t
+      $report -i $indices
+  </command>
+  <inputs>
+    <param format="fasta" name="input1" type="data">
+      <label>Sequence</label>
+    </param>
+    <param name="match" size="4" type="text" value="2">
+      <label>Match</label>
+    </param>
+    <param name="mismatch" size="4" type="text" value="7">
+      <label>Mismatch</label>
+    </param>
+    <param name="delta" size="4" type="text" value="7">
+      <label>Indels</label>
+    </param>
+    <param name="mprobability" size="4" type="integer" value="80" label="Matching probability, %">
+        <validator type="in_range" message="(10-100)" min="10" max="100"/>
+    </param>
+    <param name="iprobability" size="4" type="integer" value="10" label="Indel probability, %">
+        <validator type="in_range" message="(10-100)" min="10" max="100"/>
+    </param>
+    <param name="minscore" size="3" type="integer" value="50">
+        <label>Minimum alignment score for repeat reporting</label>
+        <validator type="in_range" message="(30-150)" min="30" max="150"/>
+    </param>
+    <param name="maxperiod" size="4" type="integer" value="500">
+        <label>Maximum period size for repeat reporting</label>
+        <validator type="in_range" message="(1-2000)" min="1" max="2000"/>
+    </param>
+    <param name="flanking" type="boolean" checked="no" truevalue="-f" falsevalue="" display="checkboxes">
+        <label>Flanking sequence</label>
+    </param>
+    <param name="masked" type="boolean" checked="no" truevalue="-m" falsevalue="" display="checkboxes">
+        <label>Masked Sequence File</label>
+    </param>
+    <param name="noredundancy" type="boolean" checked="no" truevalue="-r" falsevalue="" display="checkboxes">
+        <label>No redundancy elimination</label>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="html" name="report" label="${tool.name} on ${on_string}: Report"/>
+    <data format="html" name="indices" label="${tool.name} on ${on_string}: Indices"/>
+    <data format="tabular" name="outputdat" label="${tool.name} on ${on_string}: Data"/>
+    <data format="txt" name="outputmask" label="${tool.name} on ${on_string}: Masked Sequence">
+        <filter>masked is True</filter>
+    </data>
+  </outputs>
+  <tests>
+      <test>
+          <param name="input1" value="trf_input.fasta"/>
+          <param name="match" value="2"/>
+          <param name="mismatch" value="7"/>
+          <param name="delta" value="7"/>
+          <param name="mprobability" value="80"/>
+          <param name="iprobability" value="10"/>
+          <param name="minscore" value="50"/>
+          <param name="maxperiod" value="500"/>
+          <param name="masked" value="-m"/>
+          <param name="flanking" value=""/>
+          <param name="noredundancy" value=""/>
+          <output name="outputdat" file="trf_out.dat"/>
+          <output name="outputmask" file="trf_out.mask"/>
+      </test>
+  </tests>
+  <help>
+.. class:: warningmark
+
+The input dataset needs to be in FASTA format.
+
+-----
+
+    Tandem Repeats Finder, Version 4.04
+
+    Copyright (C) Dr. Gary Benson 1999-2004. All rights reserved.
+
+
+    Please cite:
+
+G. Benson, "Tandem repeats finder: a program to analyze DNA sequences" Nucleic Acids Research (1999) Vol. 27, No. 2, pp. 573-580.
+
+
+A tandem repeat in DNA is two or more adjacent, approximate copies of a pattern
+of nucleotides. Tandem Repeats Finder is a program to locate and display tandem
+repeats in DNA sequences. In order to use the program, the user submits a
+sequence in FASTA format. There is no need to specify the pattern, the size of
+the pattern or any other parameter. The output consists of two files: a repeat
+table file and an alignment file. The repeat table contains information about
+each repeat, including its location, size, number of copies and nucleotide
+content. Clicking on the location indices for one of the table entries opens a
+second web browser that shows an alignment of the copies against a consensus
+pattern. The program is very fast, analyzing sequences on the order of .5Mb in
+just a few seconds. Submitted sequences may be of arbitrary length. Repeats
+with pattern size in the range from 1 to 2000 bases are detected.
+  </help>
+</tool>
+<!--
+Please use: trf File Match Mismatch Delta PM PI Minscore MaxPeriod [options]
+Where: (all weights, penalties, and scores are positive)
+  File = sequences input file
+  Match  = matching weight
+  Mismatch  = mismatching penalty
+  Delta = indel penalty
+  PM = match probability (whole number)
+  PI = indel probability (whole number)
+  Minscore = minimum alignment score to report
+  MaxPeriod = maximum period size to report
+  [options] = one or more of the following :
+               -m    masked sequence file
+               -f    flanking sequence
+               -d    data file
+               -h    suppress html output
+               -r    no redundancy elimination
+Note the sequence file should be in FASTA format:
+-->