Repository 'meme_meme'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/meme_meme

Changeset 0:e416c7c26977 (2015-12-21)
Next changeset 1:d20e391eb22f (2015-12-22)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/meme commit 71ac7e12419b8541746ebf8d4ba704cbbd603db1
added:
meme.xml
test-data/meme_input_1.fasta
test-data/meme_output_html_1.html
test-data/meme_output_html_2.html
test-data/meme_output_txt_1.txt
test-data/meme_output_txt_2.txt
test-data/meme_output_xml_1.xml
test-data/meme_output_xml_2.xml
test-data/prior30.plib
tool_dependencies.xml
b
diff -r 000000000000 -r e416c7c26977 meme.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/meme.xml Mon Dec 21 06:03:33 2015 -0500
[
b'@@ -0,0 +1,333 @@\n+<tool id="meme_meme" name="MEME" version="4.11.0.0">\n+    <description>- Multiple Em for Motif Elicitation</description>\n+    <requirements>\n+        <requirement type="package" version="4.11.0">meme</requirement>\n+    </requirements>\n+    <command>\n+        <![CDATA[\n+            meme "$input1"\n+            -o "${html_outfile.files_path}"\n+            -nostatus\n+            #if str( $options_type.options_type_selector ) == \'advanced\':\n+                -sf "${ str( $options_type.sf ).replace( \' \', \'_\' ) }"\n+                -${options_type.alphabet_type.alphabet_type_selector}\n+                -mod "${options_type.mod_type.mod_type_selector}"\n+                -nmotifs "${options_type.nmotifs}"\n+                -wnsites "${options_type.wnsites}"\n+                #if $options_type.evt < float(\'inf\'):\n+                    -evt "${options_type.evt}"\n+                #end if\n+                #if str( $options_type.mod_type.mod_type_selector ) != \'oops\':\n+                    #if str( $options_type.mod_type.motif_occurrence_type.motif_occurrence_type_selector ) == \'nsites\':\n+                        -nsites "${options_type.mod_type.motif_occurrence_type.nsites}"\n+                    #elif str( $options_type.mod_type.motif_occurrence_type.motif_occurrence_type_selector ) == \'min_max_sites\':\n+                        -minsites "${options_type.mod_type.motif_occurrence_type.minsites}"\n+                        -maxsites "${options_type.mod_type.motif_occurrence_type.maxsites}"\n+                    #end if\n+                #end if\n+                #if str( $options_type.motif_width_type.motif_width_type_selector ) == \'exact\':\n+                    -w "${options_type.motif_width_type.width}"\n+                #else\n+                    -minw "${options_type.motif_width_type.minw}"\n+                    -maxw "${options_type.motif_width_type.maxw}"\n+                #end if\n+                #if str( $options_type.motif_trim_type.motif_trim_type_selector ) == \'nomatrim\':\n+                    -nomatrim\n+                #else\n+                    -wg "${options_type.motif_trim_type.wg}"\n+                    -ws "${options_type.motif_trim_type.ws}"\n+                    ${options_type.motif_trim_type.noendgaps}\n+                #end if\n+                #if str( $options_type.bfile ) != \'None\':\n+                    -bfile "${options_type.bfile}"\n+                #end if\n+                #if str( $options_type.pspfile ) != \'None\':\n+                    -psp "${options_type.pspfile}"\n+                #end if\n+                #if str( $options_type.alphabet_type.alphabet_type_selector ) == "dna":\n+                    ${options_type.alphabet_type.revcomp} ${options_type.alphabet_type.pal}\n+                #end if\n+                -maxiter "${options_type.maxiter}"\n+                -distance "${options_type.distance}"\n+                -prior "${options_type.alphabet_type.prior_type.prior_type_selector}"\n+                #if str( $options_type.alphabet_type.prior_type.prior_type_selector ) != \'addone\':\n+                    -b "${options_type.alphabet_type.prior_type.prior_b}"\n+                    #if str( $options_type.alphabet_type.prior_type.plib ) != \'None\':\n+                        -plib "${options_type.alphabet_type.prior_type.plib}"\n+                    #end if\n+                #end if\n+                #if str( $options_type.alphabet_type.spmap_type.spmap_type_selector ) == \'cons\':\n+                    -cons "${options_type.alphabet_type.spmap_type.cons}"\n+                #else\n+                    -spmap "${options_type.alphabet_type.spmap_type.spmap_type_selector}"\n+                    -spfuzz "${options_type.alphabet_type.spmap_type.spfuzz}"\n+                #end if\n+                #if str( $options_type.branching_type.branching_type_selector ) == \'x_branch\':\n+                    -x_branch\n+                    -bfactor "${options_type.branching_type.bfactor}"\n+                    -heapsize "${options_type.branching_type.heapsize}"'..b'            <param name="heapsize" type="integer" value="64" label="Maximum number of heaps to use" argument="-heapsize"/>\n+                    </when>\n+                </conditional>\n+            </when>\n+        </conditional>\n+        <param name="non_commercial_use" label="I certify that I am not using this tool for commercial purposes." type="boolean" truevalue="NON_COMMERCIAL_USE" falsevalue="COMMERCIAL_USE" checked="False">\n+            <validator type="expression" message="This tool is only available for non-commercial use.">value == True</validator>\n+        </param>\n+    </inputs>\n+    <outputs>\n+        <data format="html" name="html_outfile" label="${tool.name} on ${on_string} (html)"/>\n+        <data format="txt" name="txt_outfile" label="${tool.name} on ${on_string} (text)"/>\n+        <data format="memexml" name="xml_outfile" label="${tool.name} on ${on_string} (xml)"/>\n+    </outputs>\n+    <tests>\n+        <test>\n+            <param name="input1" value="meme_input_1.fasta" ftype="fasta" dbkey="hg19"/>\n+            <param name="options_type_selector" value="basic"/>\n+            <param name="non_commercial_use" value="True"/>\n+            <output name="html_outfile" file="meme_output_html_1.html" compare="contains"/>\n+            <output name="txt_outfile" file="meme_output_txt_1.txt" lines_diff="12"/>\n+            <output name="xml_outfile" file="meme_output_xml_1.xml" lines_diff="8"/>\n+        </test>\n+        <test>\n+            <param name="input1" value="meme_input_1.fasta" ftype="fasta" dbkey="hg19"/>\n+            <param name="options_type_selector" value="advanced"/>\n+            <param name="plib" value="prior30.plib" ftype="txt"/>\n+            <param name="non_commercial_use" value="True"/>\n+            <output name="html_outfile" file="meme_output_html_2.html" compare="contains"/>\n+            <output name="txt_outfile" file="meme_output_txt_2.txt" lines_diff="12"/>\n+            <output name="xml_outfile" file="meme_output_xml_2.xml" lines_diff="8"/>\n+        </test>\n+    </tests>\n+    <help>\n+\n+.. class:: warningmark\n+\n+**WARNING: This tool is only available for non-commercial use. Use for educational, research and non-profit purposes is permitted.\n+Before using, be sure to review, agree, and comply with the license.**\n+\n+If you want to specify sequence weights, you must include them at the top of your input FASTA file.\n+\n+MEME discovers novel, ungapped motifs (recurring, fixed-length patterns) in your sequences (sample output from sequences).\n+MEME splits variable-length patterns into two or more separate motifs.  A motif is a sequence pattern that occurs repeatedly\n+in a group of related sequences.  MEME represents motifs as position-dependent letter-probability matrices which describe the\n+probability of each possible letter at each position in the pattern.  Individual MEME motifs do not contain gaps.  Patterns\n+with variable-length gaps are split by MEME into two or more separate motifs.  MEME takes as input a group of sequences and\n+outputs as many motifs as requested.  MEME uses statistical modeling techniques to automatically choose the best width, number\n+of occurrences, and description for each motif.\n+\n+.. class:: infomark\n+\n+For detailed information on MEME, click here_, or view the license_.\n+\n+.. _here: http://meme-suite.org/doc/meme.html?man_type=web\n+.. _license: http://meme-suite.org/doc/copyright.html?man_type=web\n+\n+    </help>\n+    <citations>\n+        <citation type="bibtex">\n+            @published{Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology, pp. 28-36, AAAI Press, Menlo Park, California,\n+            author = {Bailey,Timothy L. and Elkan, Charles},\n+            title = {Fitting a mixture model by expectation maximization to discover motifs in biopolymers},\n+            year = {1994},\n+            eprint = {None},\n+            url = {http://www.sdsc.edu/~tbailey/papers/ismb94.pdf}\n+        }</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r e416c7c26977 test-data/meme_input_1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_input_1.fasta Mon Dec 21 06:03:33 2015 -0500
b
@@ -0,0 +1,66 @@
+>chr21_19617074_19617124_+
+AAAAATTATTACTAGGGAGGGGGCCGGAACCTCGGGACGTGGGTATATAA
+>chr21_26934381_26934431_+
+GCGCCTGGTCGGTTATGAGTCACAAGTGAGTTATAAAAGGGTCGCACGTT
+>chr21_28217753_28217803_-
+CAAAGGGGAGGAGTGGGGTGGGGGTGGGGGTTTCACTGGTCCACTATAAA
+>chr21_31710037_31710087_-
+AACACCCAGGTTTCTGAGTATATAATCGCCGCACCAAAGAATTTAATTTT
+>chr21_31744582_31744632_-
+CCCAGGTCTAAGAGCATATATAACTTGGAGTCCAGACTATGACATTCAAA
+>chr21_31768316_31768366_+
+AACGTATATAAATGGTCCTGTCCAGATGTGGCATGCAAACTCAGAATCTT
+>chr21_31914206_31914256_-
+TGACACCCACTACTTAGAGTATAAAATCATTCTGAGAAGTTAGAGACACC
+>chr21_31933633_31933683_-
+TCAGAGTATATATAAATGTTCCTGTCCAGTCACAGTCACCAAACTGACCT
+>chr21_31962741_31962791_-
+ACATATAACTCAGGTTGGATAAAATAATTTGTACAAATCAGGAGAGTCAA
+>chr21_31964683_31964733_+
+TCTGATTCACTGAGGCATATAAAAGGCCCTCTGCGGAGAAGTGTCCATAC
+>chr21_31973364_31973414_+
+aaacttaaaactctataaacttaaaactCTAGAATCTGATCCTGCTATAC
+>chr21_31992870_31992920_+
+CTCATACACTATTGAAGATGTATAAAATTTCATTTGCAGATGGTGACATT
+>chr21_32185595_32185645_-
+TCACCACCCACCAGAGCTGGGATATATAAAGAAGGTTCTGAGACTAGGAA
+>chr21_32202076_32202126_-
+TGCCCACCAGCTTGAGGTATAAAAAGCCCTGTACGGGAAGAGACCTTCAT
+>chr21_32253899_32253949_-
+AGCCCCACCCACCAGCAAGGATATATAAAAGCTCAGGAGTCTGGAGTGAC
+>chr21_32410820_32410870_-
+TCTACCCCACTAATCACTGAGGATGTATAAAAGTCCCAGGGAAGCTGGTG
+>chr21_36411748_36411798_-
+ATAGTTCTGTATAGTTTCAGTTGGCATCtaaaaattatataactttattt
+>chr21_37838750_37838800_-
+gatggttttataaggggcctcaccctcggctcagccctcattcttctcct
+>chr21_45705687_45705737_+
+CCGGGGCGGAGCGGCCTTTGCTCTTTGCGTGGTCGCGGGGGTATAACAGC
+>chr21_45971413_45971463_-
+CAGGCCCTGGGCATATAAAAGCCCCAGCAGCCAACAGGctcacacacaca
+>chr21_45978668_45978718_-
+CAGAGGGGTATAAAGGTTCCGACCACTCAGAGGCCTGGCACGAtcactca
+>chr21_45993530_45993580_+
+CCAAGGAGGAGTATAAAAGCCCCACAAACCCGAGCACCTCACTCACTCGC
+>chr21_46020421_46020471_+
+GAGACATATAAAAGCCAACATCCCTGAGCACCTAACACACGGactcactc
+>chr21_46031920_46031970_+
+GGAAAATACCCAGGGAGGGTATAAAACCTCAGCAGCCAGGGCACACAAAC
+>chr21_46046964_46047014_+
+ACAAGGCCAGGAGGGGTATAAAAGCCTGAGAGCCCCAAGAACctcacaca
+>chr21_46057197_46057247_+
+ATTGCTGAGTCTCCTGCTGGGAAAACACAGGCCCTGGGCATATAAAAGCC
+>chr21_46086869_46086919_-
+GACAGGTGTGCTTCTGTGCTGTGGGGATGCCTGGGCCCAGGTATAAAGGC
+>chr21_46102103_46102153_-
+AGGTGTGTGCTTCTGTGCTGTGGGGATGCCTGGGTCCAGGTATAAAGGCT
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47517957_47518007_+
+CCTGGCGGCGGGGCGGGTCAGGCCGGCGGGGCGGGGTATAAAGGGGGCGG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
+>chr21_47575506_47575556_-
+TGAGAAGCCGGTGGGGAGGTGCTGCCGGTGAGCGTATAAAGGCCCTGGCG
b
diff -r 000000000000 -r e416c7c26977 test-data/meme_output_html_1.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_html_1.html Mon Dec 21 06:03:33 2015 -0500
[
@@ -0,0 +1,100 @@
+<!DOCTYPE HTML>
+<html>
+  <head>
+    <meta charset="UTF-8">
+    <title>MEME</title>
+    <script>
+      // @JSON_VAR data
+      var data = {
+        "program": "MEME",
+        "version": "4.11.0",
+        "release": "Thu Nov 26 17:48:49 2015 +1000",
+        "stop_reason": "Stopped because requested number of motifs (1) found.",
+        "cmd": [
+          "meme",
+          "/Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat",
+          "-o",
+          "/Users/gvk/work/git_workspace/galaxy/database/job_working_directory/001/1912/dataset_2530_files",
+          "-nostatus"
+        ],
+        "options": {
+          "mod": "zoops",
+          "revcomp": false,
+          "nmotifs": 1,
+          "minw": 8,
+          "maxw": 50,
+          "minsites": 2,
+          "maxsites": 30,
+          "wnsites": 0.8,
+          "spmap": "pam",
+          "spfuzz": 120,
+          "maxwords": -1,
+          "prior": "megap",
+          "b": 7500,
+          "maxiter": 50,
+          "distance": 1e-05,
+          "wg": 11,
+          "ws": 1,
+          "noendgaps": false,
+          "substring": true
+        },
+        "alphabet": {
+          "name": "Protein",
+          "like": "protein",
+          "ncore": 20,
+          "symbols": [
+            {
+              "symbol": "A",
+              "name": "Alanine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "C",
+              "name": "Cysteine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "D",
+              "name": "Aspartic acid",
+              "colour": "FF00FF"
+            }, {
+              "symbol": "E",
+              "name": "Glutamic acid",
+              "colour": "FF00FF"
+            }, {
+              "symbol": "F",
+              "name": "Phenylalanine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "G",
+              "name": "Glycine",
+              "colour": "FFB300"
+            }, {
+              "symbol": "H",
+              "name": "Histidine",
+              "colour": "FFCCCC"
+            }, {
+              "symbol": "I",
+              "name": "Isoleucine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "K",
+              "name": "Lysine",
+              "colour": "CC0000"
+            }, {
+              "symbol": "L",
+              "name": "Leucine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "M",
+              "name": "Methionine",
+              "colour": "0000CC"
+            }, {
+              "symbol": "N",
+              "name": "Asparagine",
+              "colour": "008000"
+            }, {
+              "symbol": "P",
+              "name": "Proline",
+              "colour": "FFFF00"
+            }, {
+              "symbol": "Q",
+              "name": "Glutamine",
b
diff -r 000000000000 -r e416c7c26977 test-data/meme_output_html_2.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_html_2.html Mon Dec 21 06:03:33 2015 -0500
[
@@ -0,0 +1,100 @@
+<!DOCTYPE HTML>
+<html>
+  <head>
+    <meta charset="UTF-8">
+    <title>MEME</title>
+    <script>
+      // @JSON_VAR data
+      var data = {
+        "program": "MEME",
+        "version": "4.11.0",
+        "release": "Thu Nov 26 17:48:49 2015 +1000",
+        "stop_reason": "Stopped because requested number of motifs (1) found.",
+        "cmd": [
+          "meme",
+          "/Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat",
+          "-o",
+          "/Users/gvk/work/git_workspace/galaxy/database/job_working_directory/001/1929/dataset_2578_files",
+          "-nostatus", "-sf", "Galaxy_FASTA_Input", "-dna", "-mod", "zoops",
+          "-nmotifs", "1", "-wnsites", "0.8", "-minw", "8", "-maxw", "50",
+          "-wg", "11", "-ws", "1", "-maxiter", "50", "-distance", "0.001",
+          "-prior", "dirichlet", "-b", "0.01", "-plib",
+          "/Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2577.dat",
+          "-spmap", "uni", "-spfuzz", "0.5"
+        ],
+        "options": {
+          "mod": "zoops",
+          "revcomp": false,
+          "nmotifs": 1,
+          "minw": 8,
+          "maxw": 50,
+          "minsites": 2,
+          "maxsites": 30,
+          "wnsites": 0.8,
+          "spmap": "uni",
+          "spfuzz": 0.5,
+          "maxwords": -1,
+          "prior": "dirichlet",
+          "b": 0.01,
+          "maxiter": 50,
+          "distance": 0.001,
+          "wg": 11,
+          "ws": 1,
+          "noendgaps": false,
+          "substring": true
+        },
+        "alphabet": {
+          "name": "DNA",
+          "like": "dna",
+          "ncore": 4,
+          "symbols": [
+            {
+              "symbol": "A",
+              "name": "Adenine",
+              "colour": "CC0000",
+              "complement": "T"
+            }, {
+              "symbol": "C",
+              "name": "Cytosine",
+              "colour": "0000CC",
+              "complement": "G"
+            }, {
+              "symbol": "G",
+              "name": "Guanine",
+              "colour": "FFB300",
+              "complement": "C"
+            }, {
+              "symbol": "T",
+              "aliases": "U",
+              "name": "Thymine",
+              "colour": "008000",
+              "complement": "A"
+            }, {
+              "symbol": "N",
+              "aliases": "X.",
+              "name": "Any base",
+              "equals": "ACGT"
+            }, {
+              "symbol": "V",
+              "name": "Not T",
+              "equals": "ACG"
+            }, {
+              "symbol": "H",
+              "name": "Not G",
+              "equals": "ACT"
+            }, {
+              "symbol": "D",
+              "name": "Not C",
+              "equals": "AGT"
+            }, {
+              "symbol": "B",
+              "name": "Not A",
+              "equals": "CGT"
+            }, {
+              "symbol": "M",
+              "name": "Amino",
+              "equals": "AC"
+            }, {
+              "symbol": "R",
+              "name": "Purine",
+              "equals": "AG"
b
diff -r 000000000000 -r e416c7c26977 test-data/meme_output_txt_1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_txt_1.txt Mon Dec 21 06:03:33 2015 -0500
[
b'@@ -0,0 +1,325 @@\n+********************************************************************************\n+MEME - Motif discovery tool\n+********************************************************************************\n+MEME version 4.11.0 (Release date: Thu Nov 26 17:48:49 2015 +1000)\n+\n+For further information on how to interpret these results or to get\n+a copy of the MEME software please access http://meme-suite.org .\n+\n+This file may be used as input to the MAST algorithm for searching\n+sequence databases for matches to groups of motifs.  MAST is available\n+for interactive use and downloading at http://meme-suite.org .\n+********************************************************************************\n+\n+\n+********************************************************************************\n+REFERENCE\n+********************************************************************************\n+If you use this program in your research, please cite:\n+\n+Timothy L. Bailey and Charles Elkan,\n+"Fitting a mixture model by expectation maximization to discover\n+motifs in biopolymers", Proceedings of the Second International\n+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n+AAAI Press, Menlo Park, California, 1994.\n+********************************************************************************\n+\n+\n+********************************************************************************\n+TRAINING SET\n+********************************************************************************\n+DATAFILE= /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat\n+ALPHABET= ACDEFGHIKLMNPQRSTVWY\n+Sequence name            Weight Length  Sequence name            Weight Length  \n+-------------            ------ ------  -------------            ------ ------  \n+chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n+chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n+chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n+chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n+chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n+chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n+chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n+chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n+chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n+chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n+chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n+chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n+chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n+chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n+chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n+********************************************************************************\n+\n+********************************************************************************\n+COMMAND LINE SUMMARY\n+********************************************************************************\n+This information can also be useful in the event you wish to report a\n+problem with the MEME software.\n+\n+command: meme /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat -o /Users/gvk/work/git_workspace/galaxy/database/job_working_directory/001/1912/dataset_2530_files -nostatus \n+\n+model:  mod=         zoops    nmotifs=         1    evt=           inf\n+object function=  E-value of product of p-values\n+width:  minw=            8    maxw=           50\n+width:  wg=             11    ws=              1    endgaps=       yes\n+nsites: minsites=        2    maxsites=       30    wnsites=       0.8\n+theta:  spmap=         pam    spfuzz=        120\n+global: substring=     yes    branching=      no    wbranch'..b'  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.760000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.240000  0.000000  0.000000  0.000000 \n+ 0.960000  0.040000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000 \n+ 0.840000  0.000000  0.000000  0.000000  0.000000  0.120000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.000000  0.040000  0.000000  0.000000  0.000000 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif 1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n+--------------------------------------------------------------------------------\n+\n+\n+\n+\n+Time  0.53 secs.\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+SUMMARY OF MOTIFS\n+********************************************************************************\n+\n+--------------------------------------------------------------------------------\n+\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n+--------------------------------------------------------------------------------\n+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n+-------------            ----------------  -------------\n+chr21_19617074_19617124_         1.22e-03  39_[1(3.06e-05)]\n+chr21_26934381_26934431_         2.21e-03  27_[1(5.52e-05)]_12\n+chr21_28217753_28217803_         7.29e-01  50\n+chr21_31710037_31710087_         2.37e-03  14_[1(5.94e-05)]_25\n+chr21_31744582_31744632_         1.22e-03  12_[1(3.06e-05)]_27\n+chr21_31768316_31768366_         1.53e-03  [1(3.82e-05)]_39\n+chr21_31914206_31914256_         6.70e-04  15_[1(1.68e-05)]_24\n+chr21_31933633_31933683_         1.81e-03  4_[1(4.54e-05)]_35\n+chr21_31962741_31962791_         1.61e-02  50\n+chr21_31964683_31964733_         1.36e-04  13_[1(3.41e-06)]_26\n+chr21_31973364_31973414_         1.99e-01  50\n+chr21_31992870_31992920_         3.47e-04  16_[1(8.67e-06)]_23\n+chr21_32185595_32185645_         3.47e-04  18_[1(8.67e-06)]_21\n+chr21_32202076_32202126_         2.01e-04  13_[1(5.01e-06)]_26\n+chr21_32253899_32253949_         8.11e-04  19_[1(2.03e-05)]_20\n+chr21_32410820_32410870_         3.47e-04  21_[1(8.67e-06)]_18\n+chr21_36411748_36411798_         2.71e-03  22_[1(6.78e-05)]_17\n+chr21_37838750_37838800_         8.23e-02  50\n+chr21_45705687_45705737_         1.53e-03  37_[1(3.82e-05)]_2\n+chr21_45971413_45971463_         1.36e-04  9_[1(3.41e-06)]_30\n+chr21_45978668_45978718_         6.37e-04  4_[1(1.59e-05)]_35\n+chr21_45993530_45993580_         1.60e-04  7_[1(4.00e-06)]_32\n+chr21_46020421_46020471_         4.83e-04  2_[1(1.21e-05)]_37\n+chr21_46031920_46031970_         2.43e-04  15_[1(6.06e-06)]_24\n+chr21_46046964_46047014_         4.26e-05  12_[1(1.06e-06)]_27\n+chr21_46057197_46057247_         1.36e-04  36_[1(3.41e-06)]_3\n+chr21_46086869_46086919_         4.30e-02  50\n+chr21_46102103_46102153_         4.30e-02  50\n+chr21_47517957_47518007_         6.37e-04  32_[1(1.59e-05)]_7\n+chr21_47575506_47575556_         1.61e-03  30_[1(4.02e-05)]_9\n+--------------------------------------------------------------------------------\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+Stopped because requested number of motifs (1) found.\n+********************************************************************************\n+\n+CPU: MacBook-Pro-2.local\n+\n+********************************************************************************\n'
b
diff -r 000000000000 -r e416c7c26977 test-data/meme_output_txt_2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_txt_2.txt Mon Dec 21 06:03:33 2015 -0500
[
b'@@ -0,0 +1,319 @@\n+********************************************************************************\n+MEME - Motif discovery tool\n+********************************************************************************\n+MEME version 4.11.0 (Release date: Thu Nov 26 17:48:49 2015 +1000)\n+\n+For further information on how to interpret these results or to get\n+a copy of the MEME software please access http://meme-suite.org .\n+\n+This file may be used as input to the MAST algorithm for searching\n+sequence databases for matches to groups of motifs.  MAST is available\n+for interactive use and downloading at http://meme-suite.org .\n+********************************************************************************\n+\n+\n+********************************************************************************\n+REFERENCE\n+********************************************************************************\n+If you use this program in your research, please cite:\n+\n+Timothy L. Bailey and Charles Elkan,\n+"Fitting a mixture model by expectation maximization to discover\n+motifs in biopolymers", Proceedings of the Second International\n+Conference on Intelligent Systems for Molecular Biology, pp. 28-36,\n+AAAI Press, Menlo Park, California, 1994.\n+********************************************************************************\n+\n+\n+********************************************************************************\n+TRAINING SET\n+********************************************************************************\n+DATAFILE= Galaxy_FASTA_Input\n+ALPHABET= ACGT\n+Sequence name            Weight Length  Sequence name            Weight Length  \n+-------------            ------ ------  -------------            ------ ------  \n+chr21_19617074_19617124_ 1.0000     50  chr21_26934381_26934431_ 1.0000     50  \n+chr21_28217753_28217803_ 1.0000     50  chr21_31710037_31710087_ 1.0000     50  \n+chr21_31744582_31744632_ 1.0000     50  chr21_31768316_31768366_ 1.0000     50  \n+chr21_31914206_31914256_ 1.0000     50  chr21_31933633_31933683_ 1.0000     50  \n+chr21_31962741_31962791_ 1.0000     50  chr21_31964683_31964733_ 1.0000     50  \n+chr21_31973364_31973414_ 1.0000     50  chr21_31992870_31992920_ 1.0000     50  \n+chr21_32185595_32185645_ 1.0000     50  chr21_32202076_32202126_ 1.0000     50  \n+chr21_32253899_32253949_ 1.0000     50  chr21_32410820_32410870_ 1.0000     50  \n+chr21_36411748_36411798_ 1.0000     50  chr21_37838750_37838800_ 1.0000     50  \n+chr21_45705687_45705737_ 1.0000     50  chr21_45971413_45971463_ 1.0000     50  \n+chr21_45978668_45978718_ 1.0000     50  chr21_45993530_45993580_ 1.0000     50  \n+chr21_46020421_46020471_ 1.0000     50  chr21_46031920_46031970_ 1.0000     50  \n+chr21_46046964_46047014_ 1.0000     50  chr21_46057197_46057247_ 1.0000     50  \n+chr21_46086869_46086919_ 1.0000     50  chr21_46102103_46102153_ 1.0000     50  \n+chr21_47517957_47518007_ 1.0000     50  chr21_47575506_47575556_ 1.0000     50  \n+********************************************************************************\n+\n+********************************************************************************\n+COMMAND LINE SUMMARY\n+********************************************************************************\n+This information can also be useful in the event you wish to report a\n+problem with the MEME software.\n+\n+command: meme /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2490.dat -o /Users/gvk/work/git_workspace/galaxy/database/job_working_directory/001/1929/dataset_2578_files -nostatus -sf Galaxy_FASTA_Input -dna -mod zoops -nmotifs 1 -wnsites 0.8 -minw 8 -maxw 50 -wg 11 -ws 1 -maxiter 50 -distance 0.001 -prior dirichlet -b 0.01 -plib /Users/gvk/work/git_workspace/galaxy/database/files/002/dataset_2577.dat -spmap uni -spfuzz 0.5 \n+\n+model:  mod=         zoops    nmotifs=         1    evt=           inf\n+object function=  E-value of product of p-values\n+width:  minw=            8    maxw=           50\n+width:  wg=             11    ws=              1    endgaps=       yes\n+nsi'..b'bability matrix\n+--------------------------------------------------------------------------------\n+letter-probability matrix: alength= 4 w= 11 nsites= 30 E= 5.1e-040 \n+ 0.266667  0.066667  0.566667  0.100000 \n+ 0.300000  0.000000  0.666667  0.033333 \n+ 0.133333  0.266667  0.466667  0.133333 \n+ 0.300000  0.033333  0.600000  0.066667 \n+ 0.000000  0.000000  0.033333  0.966667 \n+ 0.866667  0.066667  0.000000  0.066667 \n+ 0.000000  0.000000  0.000000  1.000000 \n+ 0.966667  0.033333  0.000000  0.000000 \n+ 0.700000  0.000000  0.000000  0.300000 \n+ 0.933333  0.066667  0.000000  0.000000 \n+ 0.800000  0.000000  0.166667  0.033333 \n+--------------------------------------------------------------------------------\n+\n+--------------------------------------------------------------------------------\n+\tMotif 1 regular expression\n+--------------------------------------------------------------------------------\n+[GA][GA][GC][GA]TATA[AT]AA\n+--------------------------------------------------------------------------------\n+\n+\n+\n+\n+Time  0.17 secs.\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+SUMMARY OF MOTIFS\n+********************************************************************************\n+\n+--------------------------------------------------------------------------------\n+\tCombined block diagrams: non-overlapping sites with p-value < 0.0001\n+--------------------------------------------------------------------------------\n+SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM\n+-------------            ----------------  -------------\n+chr21_19617074_19617124_         5.63e-04  39_[+1(1.41e-05)]\n+chr21_26934381_26934431_         1.57e-03  27_[+1(3.93e-05)]_12\n+chr21_28217753_28217803_         1.00e-01  50\n+chr21_31710037_31710087_         2.49e-03  14_[+1(6.24e-05)]_25\n+chr21_31744582_31744632_         1.22e-03  12_[+1(3.04e-05)]_27\n+chr21_31768316_31768366_         1.47e-03  [+1(3.67e-05)]_39\n+chr21_31914206_31914256_         6.45e-04  15_[+1(1.61e-05)]_24\n+chr21_31933633_31933683_         2.26e-03  4_[+1(5.65e-05)]_35\n+chr21_31962741_31962791_         3.37e-02  50\n+chr21_31964683_31964733_         1.95e-04  13_[+1(4.86e-06)]_26\n+chr21_31973364_31973414_         5.73e-02  50\n+chr21_31992870_31992920_         5.52e-04  16_[+1(1.38e-05)]_23\n+chr21_32185595_32185645_         2.59e-04  18_[+1(6.48e-06)]_21\n+chr21_32202076_32202126_         1.10e-04  13_[+1(2.74e-06)]_26\n+chr21_32253899_32253949_         7.78e-04  17_[+1(1.95e-05)]_22\n+chr21_32410820_32410870_         5.52e-04  21_[+1(1.38e-05)]_18\n+chr21_36411748_36411798_         2.85e-03  22_[+1(7.15e-05)]_17\n+chr21_37838750_37838800_         1.90e-02  50\n+chr21_45705687_45705737_         8.63e-04  37_[+1(2.16e-05)]_2\n+chr21_45971413_45971463_         1.95e-04  9_[+1(4.86e-06)]_30\n+chr21_45978668_45978718_         2.59e-04  4_[+1(6.48e-06)]_35\n+chr21_45993530_45993580_         1.95e-04  7_[+1(4.86e-06)]_32\n+chr21_46020421_46020471_         7.78e-04  2_[+1(1.95e-05)]_37\n+chr21_46031920_46031970_         8.89e-05  15_[+1(2.22e-06)]_24\n+chr21_46046964_46047014_         1.80e-05  12_[+1(4.51e-07)]_27\n+chr21_46057197_46057247_         1.95e-04  36_[+1(4.86e-06)]_3\n+chr21_46086869_46086919_         5.54e-03  50\n+chr21_46102103_46102153_         5.54e-03  50\n+chr21_47517957_47518007_         2.59e-04  32_[+1(6.48e-06)]_7\n+chr21_47575506_47575556_         1.22e-03  30_[+1(3.04e-05)]_9\n+--------------------------------------------------------------------------------\n+\n+********************************************************************************\n+\n+\n+********************************************************************************\n+Stopped because requested number of motifs (1) found.\n+********************************************************************************\n+\n+CPU: dot1x-cb-51.aset.psu.edu\n+\n+********************************************************************************\n'
b
diff -r 000000000000 -r e416c7c26977 test-data/meme_output_xml_1.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_xml_1.xml Mon Dec 21 06:03:33 2015 -0500
[
b'@@ -0,0 +1,1285 @@\n+<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n+<!-- Document definition -->\n+<!DOCTYPE MEME[\n+<!ELEMENT MEME (\n+  training_set,\n+  model, \n+  motifs, \n+  scanned_sites_summary?\n+)>\n+<!ATTLIST MEME \n+  version CDATA #REQUIRED\n+  release CDATA #REQUIRED\n+>\n+<!-- Training-set elements -->\n+<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n+<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n+<!ELEMENT alphabet (letter*)>\n+<!ATTLIST alphabet name CDATA #REQUIRED>\n+<!ELEMENT ambigs (letter*)>\n+<!ELEMENT letter EMPTY>\n+<!ATTLIST letter id ID #REQUIRED>\n+<!ATTLIST letter symbol CDATA #REQUIRED>\n+<!ATTLIST letter equals CDATA #IMPLIED>\n+<!ATTLIST letter aliases CDATA #IMPLIED>\n+<!ATTLIST letter complement CDATA #IMPLIED>\n+<!ATTLIST letter name CDATA #IMPLIED>\n+<!ATTLIST letter colour CDATA #IMPLIED>\n+<!ELEMENT sequence EMPTY>\n+<!ATTLIST sequence id ID #REQUIRED\n+                   name CDATA #REQUIRED\n+                   length CDATA #REQUIRED\n+                   weight CDATA #REQUIRED\n+>\n+<!ELEMENT letter_frequencies (alphabet_array)>\n+\n+<!-- Model elements -->\n+<!ELEMENT model (\n+  command_line,\n+  host,\n+  type,\n+  nmotifs,\n+  evalue_threshold,\n+  object_function,\n+  min_width,\n+  max_width,\n+  minic,\n+  wg,\n+  ws,\n+  endgaps,\n+  minsites,\n+  maxsites,\n+  wnsites,\n+  prob,\n+  spmap,\n+  spfuzz,\n+  prior,\n+  beta,\n+  maxiter,\n+  distance,\n+  num_sequences,\n+  num_positions,\n+  seed,\n+  seqfrac,\n+  strands,\n+  priors_file,\n+  reason_for_stopping,\n+  background_frequencies\n+)>\n+<!ELEMENT command_line (#PCDATA)*>\n+<!ELEMENT host (#PCDATA)*>\n+<!ELEMENT type (#PCDATA)*>\n+<!ELEMENT nmotifs (#PCDATA)*>\n+<!ELEMENT evalue_threshold (#PCDATA)*>\n+<!ELEMENT object_function (#PCDATA)*>\n+<!ELEMENT min_width (#PCDATA)*>\n+<!ELEMENT max_width (#PCDATA)*>\n+<!ELEMENT minic (#PCDATA)*>\n+<!ELEMENT wg (#PCDATA)*>\n+<!ELEMENT ws (#PCDATA)*>\n+<!ELEMENT endgaps (#PCDATA)*>\n+<!ELEMENT minsites (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT wnsites (#PCDATA)*>\n+<!ELEMENT prob (#PCDATA)*>\n+<!ELEMENT spmap (#PCDATA)*>\n+<!ELEMENT spfuzz (#PCDATA)*>\n+<!ELEMENT prior (#PCDATA)*>\n+<!ELEMENT beta (#PCDATA)*>\n+<!ELEMENT maxiter (#PCDATA)*>\n+<!ELEMENT distance (#PCDATA)*>\n+<!ELEMENT num_sequences (#PCDATA)*>\n+<!ELEMENT num_positions (#PCDATA)*>\n+<!ELEMENT seed (#PCDATA)*>\n+<!ELEMENT seqfrac (#PCDATA)*>\n+<!ELEMENT strands (#PCDATA)*>\n+<!ELEMENT priors_file (#PCDATA)*>\n+<!ELEMENT reason_for_stopping (#PCDATA)*>\n+<!ELEMENT background_frequencies (alphabet_array)>\n+<!ATTLIST background_frequencies source CDATA #REQUIRED>\n+\n+<!-- Motif elements -->\n+<!ELEMENT motifs (motif*)>\n+<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n+<!ATTLIST motif id ID #REQUIRED\n+                name CDATA #REQUIRED\n+                width CDATA #REQUIRED\n+                sites CDATA #REQUIRED\n+                llr CDATA #REQUIRED\n+                ic CDATA #REQUIRED\n+                re CDATA #REQUIRED\n+                bayes_threshold CDATA #REQUIRED\n+                e_value CDATA #REQUIRED\n+                elapsed_time CDATA #REQUIRED\n+                url CDATA ""\n+>\n+<!ELEMENT scores (alphabet_matrix)>\n+<!ELEMENT probabilities (alphabet_matrix)>\n+<!ELEMENT regular_expression (#PCDATA)*>\n+\n+<!-- Contributing site elements -->\n+<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n+<!ELEMENT contributing_sites (contributing_site*)>\n+<!ELEMENT contributing_site (left_flank, site, right_flank)>\n+<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n+                          position CDATA #REQUIRED\n+                          strand (plus|minus|none) \'none\'\n+                          pvalue CDATA #REQUIRED\n+>\n+<!-- The left_flank contains the sequence for 10 bases to the left of the motif start -->\n+<!ELEMENT left_flank (#PCDATA)>\n+<!-- The site contains the sequence for the motif instance -->\n+<!ELEMENT site (letter_ref*)>\n+<!-- The right_flank contai'..b'="none" position="12" pvalue="3.06e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_5" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="0" pvalue="3.82e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_6" pvalue="6.70e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="1.68e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_7" pvalue="1.81e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="4.54e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_8" pvalue="1.61e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_9" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_10" pvalue="1.99e-01" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_11" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="16" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_12" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="18" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_13" pvalue="2.01e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="13" pvalue="5.01e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_14" pvalue="8.11e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="19" pvalue="2.03e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_15" pvalue="3.47e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="21" pvalue="8.67e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_16" pvalue="2.71e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="22" pvalue="6.78e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_17" pvalue="8.23e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_18" pvalue="1.53e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="37" pvalue="3.82e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_19" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="9" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_20" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="4" pvalue="1.59e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_21" pvalue="1.60e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="7" pvalue="4.00e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_22" pvalue="4.83e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="2" pvalue="1.21e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_23" pvalue="2.43e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="15" pvalue="6.06e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_24" pvalue="4.26e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="12" pvalue="1.06e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_25" pvalue="1.36e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="36" pvalue="3.41e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_26" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_27" pvalue="4.30e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_28" pvalue="6.37e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="32" pvalue="1.59e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_29" pvalue="1.61e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="none" position="30" pvalue="4.02e-05"/>\n+</scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r 000000000000 -r e416c7c26977 test-data/meme_output_xml_2.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/meme_output_xml_2.xml Mon Dec 21 06:03:33 2015 -0500
[
b'@@ -0,0 +1,977 @@\n+<?xml version=\'1.0\' encoding=\'UTF-8\' standalone=\'yes\'?>\n+<!-- Document definition -->\n+<!DOCTYPE MEME[\n+<!ELEMENT MEME (\n+  training_set,\n+  model, \n+  motifs, \n+  scanned_sites_summary?\n+)>\n+<!ATTLIST MEME \n+  version CDATA #REQUIRED\n+  release CDATA #REQUIRED\n+>\n+<!-- Training-set elements -->\n+<!ELEMENT training_set (alphabet, ambigs, sequence*, letter_frequencies)>\n+<!ATTLIST training_set datafile CDATA #REQUIRED length CDATA #REQUIRED>\n+<!ELEMENT alphabet (letter*)>\n+<!ATTLIST alphabet name CDATA #REQUIRED>\n+<!ELEMENT ambigs (letter*)>\n+<!ELEMENT letter EMPTY>\n+<!ATTLIST letter id ID #REQUIRED>\n+<!ATTLIST letter symbol CDATA #REQUIRED>\n+<!ATTLIST letter equals CDATA #IMPLIED>\n+<!ATTLIST letter aliases CDATA #IMPLIED>\n+<!ATTLIST letter complement CDATA #IMPLIED>\n+<!ATTLIST letter name CDATA #IMPLIED>\n+<!ATTLIST letter colour CDATA #IMPLIED>\n+<!ELEMENT sequence EMPTY>\n+<!ATTLIST sequence id ID #REQUIRED\n+                   name CDATA #REQUIRED\n+                   length CDATA #REQUIRED\n+                   weight CDATA #REQUIRED\n+>\n+<!ELEMENT letter_frequencies (alphabet_array)>\n+\n+<!-- Model elements -->\n+<!ELEMENT model (\n+  command_line,\n+  host,\n+  type,\n+  nmotifs,\n+  evalue_threshold,\n+  object_function,\n+  min_width,\n+  max_width,\n+  minic,\n+  wg,\n+  ws,\n+  endgaps,\n+  minsites,\n+  maxsites,\n+  wnsites,\n+  prob,\n+  spmap,\n+  spfuzz,\n+  prior,\n+  beta,\n+  maxiter,\n+  distance,\n+  num_sequences,\n+  num_positions,\n+  seed,\n+  seqfrac,\n+  strands,\n+  priors_file,\n+  reason_for_stopping,\n+  background_frequencies\n+)>\n+<!ELEMENT command_line (#PCDATA)*>\n+<!ELEMENT host (#PCDATA)*>\n+<!ELEMENT type (#PCDATA)*>\n+<!ELEMENT nmotifs (#PCDATA)*>\n+<!ELEMENT evalue_threshold (#PCDATA)*>\n+<!ELEMENT object_function (#PCDATA)*>\n+<!ELEMENT min_width (#PCDATA)*>\n+<!ELEMENT max_width (#PCDATA)*>\n+<!ELEMENT minic (#PCDATA)*>\n+<!ELEMENT wg (#PCDATA)*>\n+<!ELEMENT ws (#PCDATA)*>\n+<!ELEMENT endgaps (#PCDATA)*>\n+<!ELEMENT minsites (#PCDATA)*>\n+<!ELEMENT maxsites (#PCDATA)*>\n+<!ELEMENT wnsites (#PCDATA)*>\n+<!ELEMENT prob (#PCDATA)*>\n+<!ELEMENT spmap (#PCDATA)*>\n+<!ELEMENT spfuzz (#PCDATA)*>\n+<!ELEMENT prior (#PCDATA)*>\n+<!ELEMENT beta (#PCDATA)*>\n+<!ELEMENT maxiter (#PCDATA)*>\n+<!ELEMENT distance (#PCDATA)*>\n+<!ELEMENT num_sequences (#PCDATA)*>\n+<!ELEMENT num_positions (#PCDATA)*>\n+<!ELEMENT seed (#PCDATA)*>\n+<!ELEMENT seqfrac (#PCDATA)*>\n+<!ELEMENT strands (#PCDATA)*>\n+<!ELEMENT priors_file (#PCDATA)*>\n+<!ELEMENT reason_for_stopping (#PCDATA)*>\n+<!ELEMENT background_frequencies (alphabet_array)>\n+<!ATTLIST background_frequencies source CDATA #REQUIRED>\n+\n+<!-- Motif elements -->\n+<!ELEMENT motifs (motif*)>\n+<!ELEMENT motif (scores, probabilities, regular_expression?, contributing_sites)>\n+<!ATTLIST motif id ID #REQUIRED\n+                name CDATA #REQUIRED\n+                width CDATA #REQUIRED\n+                sites CDATA #REQUIRED\n+                llr CDATA #REQUIRED\n+                ic CDATA #REQUIRED\n+                re CDATA #REQUIRED\n+                bayes_threshold CDATA #REQUIRED\n+                e_value CDATA #REQUIRED\n+                elapsed_time CDATA #REQUIRED\n+                url CDATA ""\n+>\n+<!ELEMENT scores (alphabet_matrix)>\n+<!ELEMENT probabilities (alphabet_matrix)>\n+<!ELEMENT regular_expression (#PCDATA)*>\n+\n+<!-- Contributing site elements -->\n+<!-- Contributing sites are motif occurences found during the motif discovery phase -->\n+<!ELEMENT contributing_sites (contributing_site*)>\n+<!ELEMENT contributing_site (left_flank, site, right_flank)>\n+<!ATTLIST contributing_site sequence_id IDREF #REQUIRED\n+                          position CDATA #REQUIRED\n+                          strand (plus|minus|none) \'none\'\n+                          pvalue CDATA #REQUIRED\n+>\n+<!-- The left_flank contains the sequence for 10 bases to the left of the motif start -->\n+<!ELEMENT left_flank (#PCDATA)>\n+<!-- The site contains the sequence for the motif instance -->\n+<!ELEMENT site (letter_ref*)>\n+<!-- The right_flank contain'..b'="plus" position="12" pvalue="3.04e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_5" pvalue="1.47e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="0" pvalue="3.67e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_6" pvalue="6.45e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="1.61e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_7" pvalue="2.26e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="5.65e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_8" pvalue="3.37e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_9" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_10" pvalue="5.73e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_11" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="16" pvalue="1.38e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_12" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="18" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_13" pvalue="1.10e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="13" pvalue="2.74e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_14" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="17" pvalue="1.95e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_15" pvalue="5.52e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="21" pvalue="1.38e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_16" pvalue="2.85e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="22" pvalue="7.15e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_17" pvalue="1.90e-02" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_18" pvalue="8.63e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="37" pvalue="2.16e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_19" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="9" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_20" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="4" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_21" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="7" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_22" pvalue="7.78e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="2" pvalue="1.95e-05"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_23" pvalue="8.89e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="15" pvalue="2.22e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_24" pvalue="1.80e-05" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="12" pvalue="4.51e-07"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_25" pvalue="1.95e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="36" pvalue="4.86e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_26" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_27" pvalue="5.54e-03" num_sites="0"></scanned_sites>\n+<scanned_sites sequence_id="sequence_28" pvalue="2.59e-04" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="32" pvalue="6.48e-06"/>\n+</scanned_sites>\n+<scanned_sites sequence_id="sequence_29" pvalue="1.22e-03" num_sites="1"><scanned_site motif_id="motif_1" strand="plus" position="30" pvalue="3.04e-05"/>\n+</scanned_sites>\n+</scanned_sites_summary>\n+</MEME>\n'
b
diff -r 000000000000 -r e416c7c26977 test-data/prior30.plib
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/prior30.plib Mon Dec 21 06:03:33 2015 -0500
b
b'@@ -0,0 +1,275 @@\n+Alphabet= ACDEFGHIKLMNPQRSTVWY\n+NumDistr= 30\n+Number= 0\n+Mixture= 0.055795\n+B= 5.623820\n+Alpha= 0.0855491 0.0221831 0.0111063 0.0209959 0.0505726 0.025437 0.0155389 0.132951 0.0247865 0.150287 0.0577239 0.0209317 0.0166629 0.0220905 0.0244295 0.0497608 0.070277 0.157532 0.0102219 0.0309633 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=    HMM9.4 reestimated in henikoff29.2\n+\n+Number= 1\n+Mixture= 0.198333\n+B= 0.097240\n+Alpha= 0.0562629 0.0329597 0.0692513 0.0385232 0.0400041 0.143573 0.0428939 0.0226244 0.0442102 0.0665467 0.0117853 0.0447655 0.0833299 0.0395825 0.0611271 0.0588852 0.0513472 0.0317153 0.0237865 0.0368161 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       24\n+Comment=      Outside\n+\n+Number= 2\n+Mixture= 0.043566\n+B= 1.648336\n+Alpha= 0.0144564 0.00845337 0.00785519 0.00864933 0.255959 0.0110815 0.0509526 0.0234533 0.0120443 0.0561967 0.015111 0.0190974 0.00857653 0.0167812 0.0164918 0.0197108 0.0151013 0.0252782 0.050139 0.364613 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       26\n+Comment=       Inside\n+\n+Number= 3\n+Mixture= 0.060170\n+B= 2.595432\n+Alpha= 0.0452144 0.00587917 0.169731 0.0751478 0.00749471 0.0845832 0.0369819 0.00610072 0.0548186 0.011029 0.00382749 0.212785 0.0206532 0.0416705 0.0280716 0.117267 0.0533742 0.00943157 0.00216149 0.0137784 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       19\n+Comment=      Outside Alpha\n+\n+Number= 4\n+Mixture= 0.065466\n+B= 3.112271\n+Alpha= 0.0361167 0.0049157 0.0134924 0.0461325 0.00557631 0.0209043 0.0302551 0.016425 0.307554 0.0338255 0.0139435 0.0360733 0.0127659 0.0873761 0.222668 0.0369042 0.0354442 0.0228891 0.00434827 0.0123906 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       21\n+Comment=       Outside Beta\n+\n+Number= 5\n+Mixture= 0.067614\n+B= 2.053644\n+Alpha= 0.0194362 0.00765176 0.00188738 0.00372898 0.0849894 0.00421787 0.00400459 0.152735 0.00407958 0.4568 0.106051 0.00304386 0.00545956 0.00900935 0.00605071 0.00519029 0.016255 0.0861045 0.00787965 0.0154248 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       22\n+Comment=      Inside alpha\n+\n+Number= 6\n+Mixture= 0.080724\n+B= 2.138987\n+Alpha= 0.0423172 0.0153891 0.00409306 0.00565735 0.0197117 0.00590607 0.00139926 0.307863 0.00544884 0.115721 0.0285808 0.00522771 0.00474851 0.00328193 0.00351054 0.00892385 0.0348922 0.380003 0.00117673 0.00614917 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=       Inside beta\n+\n+Number= 7\n+Mixture= 0.051030\n+B= 3.878926\n+Alpha= 0.0548123 0.000759746 0.144127 0.46019 0.00249502 0.0192754 0.0106535 0.00938765 0.0562429 0.0163148 0.00717389 0.0245612 0.0177482 0.0744802 0.0199233 0.0323535 0.0257651 0.018574 0.00087086 0.00429088 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=      Alpha helix\n+\n+Number= 8\n+Mixture= 0.103529\n+B= 1.486325\n+Alpha= 0.315754 0.0384546 0.0116388 0.0133665 0.0111126 0.107921 0.00752325 0.0154885 0.0111281 0.0231087 0.011626 0.0228375 0.0304785 0.0166632 0.0156345 0.186379 0.0954421 0.0546691 0.00351538 0.00725682 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=      Beta strand\n+\n+Number= 9\n+Mixture= 0.062940\n+B= 8.221215\n+Alpha= 0.0869919 0.00672577 0.0600995 0.10763 0.0153489 0.0378086 0.0325335 0.023388 0.113765 0.041623 0.0196906 0.0625344 0.0262599 0.0788667 0.0707399 0.0886634 0.0666777 0.0361472 0.00484308 0.0196629 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=       23\n+Comment=      Other\n+\n+Number= 10\n+Mixture= 0.012518\n+B= 38.955631\n+Alpha= 0.732922 0.0145131 0.00623235 0.00951423 0.00717778 0.0289521 0.00351664 0.0125081 0.00886593 0.0183651 0.00832812 0.00670968 0.00364556 0.00622169 0.00812899 0.0582399 0.0205067 0.0394327 0.00207485 0.00414489 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=      A\n+\n+Number= 11\n+Mixture= 0.004953\n+B= 381.562195\n+Alpha= 0.00563239 0.959814 0.00144129 0.00213042 0.00158645 0.00168393 0.000989765 0.00325263 0.00148501 0.00343924 0.00168673 0.00159054 0.00121534 0.00129942 0.00195209 0.00296106 0.0039912 0.00266944 0.000327808 0.000851203 \n+FullUpdate= 1\n+QUpdate= 1\n+Str'..b'nt=     I \n+\n+Number= 18\n+Mixture= 0.009400\n+B= 150.415985\n+Alpha= 0.00688657 0.00169711 0.00222738 0.00346887 0.00115861 0.00302866 0.00209171 0.00400905 0.903944 0.0037747 0.00186061 0.00449531 0.00249618 0.00324487 0.041775 0.00392196 0.00461714 0.00296607 0.000893256 0.00144282 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=      K \n+\n+Number= 19\n+Mixture= 0.017057\n+B= 31.896633\n+Alpha= 0.0114646 0.00367926 0.00296188 0.00596126 0.0190009 0.00382486 0.00338381 0.0401936 0.00650072 0.790038 0.031659 0.00392791 0.0050046 0.00753591 0.00771818 0.00748621 0.0101555 0.0312597 0.00242405 0.00581952 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     L \n+\n+Number= 20\n+Mixture= 0.002761\n+B= 201.346268\n+Alpha= 0.00353933 0.00165628 0.0014931 0.00161065 0.00279831 0.00194259 0.00101868 0.00969101 0.00211316 0.0217036 0.928022 0.00162899 0.0015681 0.0015629 0.00138977 0.00294601 0.00311476 0.00723178 0.00156295 0.00340569 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     M \n+\n+Number= 21\n+Mixture= 0.005734\n+B= 108.343185\n+Alpha= 0.0067512 0.00239062 0.0140378 0.0043452 0.00365788 0.00689345 0.0148828 0.00715373 0.00789036 0.00614036 0.00289697 0.858995 0.00399721 0.00770961 0.00570515 0.0238176 0.011602 0.00591549 0.00167893 0.00353897 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     N \n+\n+Number= 22\n+Mixture= 0.022818\n+B= 15.153304\n+Alpha= 0.0417987 0.00360232 0.0113792 0.0152366 0.00564775 0.0123795 0.00606957 0.0091353 0.0165122 0.0167265 0.00490487 0.00915437 0.755604 0.0131375 0.012587 0.0283392 0.0189623 0.0140029 0.0012848 0.00353553 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     P \n+\n+Number= 23\n+Mixture= 0.005931\n+B= 79.417511\n+Alpha= 0.0142993 0.00266984 0.0053289 0.0321605 0.0028715 0.00426743 0.0257509 0.00565307 0.0106106 0.0161186 0.00955753 0.0104696 0.00638107 0.807311 0.0149106 0.0111968 0.00889459 0.00681482 0.00206658 0.00266624 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     Q \n+\n+Number= 24\n+Mixture= 0.011491\n+B= 93.103897\n+Alpha= 0.00756896 0.00314197 0.00296652 0.00327634 0.00194604 0.00467894 0.00721049 0.00406061 0.0277257 0.00663852 0.00217868 0.00577047 0.00473306 0.00953551 0.889701 0.00650859 0.00506022 0.00294281 0.00205549 0.00230062 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=      R \n+\n+Number= 25\n+Mixture= 0.008219\n+B= 47.504795\n+Alpha= 0.0284818 0.00697155 0.00749796 0.00604665 0.00515171 0.00954817 0.00380684 0.00637929 0.0104463 0.00908885 0.00471437 0.0194592 0.00711823 0.00611827 0.00979722 0.707416 0.139256 0.00656298 0.0015377 0.00460086 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=    0\n+Comment=     S \n+\n+Number= 26\n+Mixture= 0.019050\n+B= 14.027470\n+Alpha= 0.0247201 0.00718027 0.00845584 0.0076239 0.00600101 0.0073401 0.00492149 0.0173757 0.0129878 0.0125773 0.0100452 0.0230424 0.00659406 0.0110314 0.0112037 0.107763 0.690341 0.0249364 0.00193884 0.00392074 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     T \n+\n+Number= 27\n+Mixture= 0.007047\n+B= 76.958153\n+Alpha= 0.0447488 0.00734525 0.00576457 0.00805666 0.00714188 0.00593389 0.0041663 0.0688592 0.00714299 0.0255115 0.00800708 0.00501678 0.00632646 0.00492002 0.00812967 0.0100074 0.0240134 0.745035 0.00126243 0.00261056 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     V \n+\n+Number= 28\n+Mixture= 0.003957\n+B= 150.973328\n+Alpha= 0.00517343 0.00213336 0.00350645 0.00390297 0.018439 0.0041919 0.0023655 0.00404231 0.00420998 0.0171406 0.00379068 0.00363696 0.00245861 0.00387467 0.00502035 0.00465674 0.00417283 0.00620977 0.888513 0.012561 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     W \n+\n+Number= 29\n+Mixture= 0.004904\n+B= 30.653225\n+Alpha= 0.0342049 0.00809912 0.0126852 0.0174701 0.156033 0.0118268 0.0431342 0.0204751 0.0164439 0.0363664 0.0129811 0.0131986 0.0103037 0.0116235 0.0159032 0.0287792 0.0176143 0.024986 0.0131845 0.494687 \n+FullUpdate= 1\n+QUpdate= 1\n+StructID=      0\n+Comment=     Y \n+\n+/* $Header$ */\n+/* $Header$ */\n+/* $Header$ */\n'
b
diff -r 000000000000 -r e416c7c26977 tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Dec 21 06:03:33 2015 -0500
b
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="meme" version="4.11.0">
+        <repository changeset_revision="6ee2e1225125" name="package_meme_4_11_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>