Repository 'homer_findmotifsgenome'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/homer_findmotifsgenome

Changeset 2:a8f207b43f64 (2021-12-13)
Previous changeset 1:3126da33847c (2021-09-26) Next changeset 3:4fe92af4542b (2023-04-06)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit 186b72f369eb2a11d92f4d63cac2e8ebe386b9bd"
modified:
homer_findMotifsGenome.xml
macros.xml
test-data/CTCF_peaks_shifted.bed
test-data/generate_tests.sh
added:
test-data/chr2_subset.fa.gz
test-data/scanMotif_res1.bed
test-data/scanMotif_res1.txt
removed:
test-data/chr2_subset.fa
b
diff -r 3126da33847c -r a8f207b43f64 homer_findMotifsGenome.xml
--- a/homer_findMotifsGenome.xml Sun Sep 26 12:29:15 2021 +0000
+++ b/homer_findMotifsGenome.xml Mon Dec 13 15:14:25 2021 +0000
[
b'@@ -115,20 +115,24 @@\n     $advanced.homer12.quickMask\n     -minlp \'$advanced.homer12.minlp\'\n #elif str( $advanced.homer12.version ) == "homer1":\n+    -homer1\n     -depth \'$advanced.homer12.depth\'\n #end if\n #if str( $background.use ) == "none":\n     #if not $nomotif:\n-        && cp \'${output}\'/homerResults.html outputHomer.html\n+        && cp \'${output}/homerResults.html\' outputHomer.html\n         && cp -r \'${output}\' \'${html_homer_file.files_path}\'\n+        && cp -r \'${output}/homerResults\' homerResults\n     #end if\n     #if not $motif_options.noknown:\n-        && cp \'${output}\'/knownResults.html outputKnown.html\n+        && cp \'${output}/knownResults.html\' outputKnown.html\n         && cp -r \'${output}\' \'${html_file.files_path}\'\n+        && cp -r \'${output}/knownResults\' knownResults\n     #end if\n #else\n-   && cp \'${output}\'/homerResults.html outputHomer.html\n+   && cp \'${output}/homerResults.html\' outputHomer.html\n    && cp -r \'${output}\' \'${html_homer_file.files_path}\'\n+   && cp -r \'${output}/homerResults\' homerResults\n #end if\n         ]]></command>\n     <inputs>\n@@ -255,12 +259,21 @@\n             <filter>motif_options[\'noknown\'] is False</filter>\n             <filter>background[\'use\'] == "none"</filter>\n         </data>\n+        <collection name="output_collection_known" type="list" label="${tool.name} on ${on_string}: Known motifs files">\n+            <discover_datasets directory="knownResults" pattern="(?P&lt;designation&gt;.+)\\.motif" format="txt" visible="false"/>\n+            <filter>motif_options[\'noknown\'] is False</filter>\n+            <filter>background[\'use\'] == "none"</filter>\n+        </collection>\n         <data format="html" name="html_homer_file" from_work_dir="outputHomer.html" label="${tool.name} on ${on_string}: De novo motifs">\n             <filter>nomotif is False</filter>\n         </data>\n+        <collection name="output_collection_de_novo" type="list" label="${tool.name} on ${on_string}: De novo motifs files">\n+            <discover_datasets directory="homerResults" pattern="(?P&lt;designation&gt;.+)\\.motif" format="txt" visible="false"/>\n+            <filter>nomotif is False</filter>\n+        </collection>\n     </outputs>\n     <tests>\n-        <test expect_num_outputs="2">\n+        <test expect_num_outputs="4">\n             <param name="input" value="fake_phix_peaks.bed"/>\n             <conditional name="genome">\n                 <param name="source" value="installed"/>\n@@ -271,58 +284,97 @@\n                 <assert_contents>\n                     <has_text text="fake_phix_peaks_bed_motif/ - Homer de novo Motif Results"/>\n                     <has_text text="Total target sequences = 1"/>\n-                    <has_text text="Jaspar"/>\n+                    <!-- This is too much impredictible -->\n+                    <!-- <has_text text="Jaspar"/> -->\n                 </assert_contents>\n             </output>\n+            <output_collection name="output_collection_known" type="list" count="0">\n+            </output_collection>\n+            <output_collection name="output_collection_de_novo" type="list">\n+                <element name="motif1">\n+                    <assert_contents>\n+                        <has_text text=">"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n         </test>\n-        <test expect_num_outputs="2">\n+        <test expect_num_outputs="4">\n             <param name="input" value="CTCF_peaks_shifted.bed"/>\n             <conditional name="genome">\n                 <param name="source" value="history"/>\n-                <param name="fasta" value="chr2_subset.fa"/>\n+                <param name="fasta" value="chr2_subset.fa.gz"/>\n             </conditional>\n             <output name="html_file">\n                 <assert_contents>\n                     <has_text text="CTCF_peaks_shifted_bed_motif - Homer Known Motif Enrichment Results"/>\n-                    <has_text text="Total Target Sequences = 40"/>\n+                    <ha'..b'ences = 34"/>\n+                    <has_text text="Total Target Sequences = 18"/>\n                     <has_text text="CTCF(Zf)/CD4+-CTCF-ChIP-Seq(Barski_et_al.)/Homer"/>\n                 </assert_contents>\n             </output>\n             <output name="html_homer_file">\n                 <assert_contents>\n                     <has_text text="CTCF_peaks_shifted_bed_motif/ - Homer de novo Motif Results"/>\n-                    <has_text text="Total target sequences = 34"/>\n-                    <has_text_matching expression="CTCF(Zf)|CTCF/MA|BORIS|CTCFL"/>\n+                    <has_text text="Total target sequences = 18"/>\n+                    <!-- This is too much impredictible -->\n+                    <!-- <has_text_matching expression="CTCF(Zf)|CTCF/MA|BORIS|CTCFL|NFATC2"/> -->\n                 </assert_contents>\n             </output>\n+            <output_collection name="output_collection_known" type="list">\n+                <element name="known1">\n+                    <assert_contents>\n+                        <has_text text=">"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n+            <output_collection name="output_collection_de_novo" type="list">\n+                <element name="motif1">\n+                    <assert_contents>\n+                        <has_text text=">"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n         </test>\n-        <test expect_num_outputs="1">\n+        <test expect_num_outputs="2">\n             <param name="input" value="CTCF_peaks_shifted.bed"/>\n             <conditional name="genome">\n                 <param name="source" value="history"/>\n-                <param name="fasta" value="chr2_subset.fa"/>\n+                <param name="fasta" value="chr2_subset.fa.gz"/>\n             </conditional>\n             <section name="motif_options">\n                 <param name="mset" value="plants"/>\n@@ -331,13 +383,14 @@\n             <output name="html_file">\n                 <assert_contents>\n                     <has_text text="CTCF_peaks_shifted_bed_motif - Homer Known Motif Enrichment Results"/>\n-                    <has_text text="Total Target Sequences = 40"/>\n-                    <has_text text="RAP26"/>\n+                    <has_text text="Total Target Sequences = 24"/>\n                 </assert_contents>\n             </output>\n+            <output_collection name="output_collection_known" type="list" count="0">\n+            </output_collection>\n         </test>\n         <!-- background tests -->\n-        <test expect_num_outputs="1">\n+        <test expect_num_outputs="2">\n             <param name="input" value="fake_phix_peaks.bed"/>\n             <conditional name="genome">\n                 <param name="source" value="installed"/>\n@@ -353,8 +406,15 @@\n                     <has_text text="Yeast"/>\n                 </assert_contents>\n             </output>\n+            <output_collection name="output_collection_de_novo" type="list">\n+                <element name="motif1">\n+                    <assert_contents>\n+                        <has_text text=">"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n         </test>\n-        <test expect_num_outputs="1">\n+        <test expect_num_outputs="2">\n             <param name="input" value="fake_phix_peaks.bed"/>\n             <conditional name="genome">\n                 <param name="source" value="installed"/>\n@@ -370,6 +430,13 @@\n                     <has_text text="YAP5"/>\n                 </assert_contents>\n             </output>\n+            <output_collection name="output_collection_de_novo" type="list">\n+                <element name="motif1">\n+                    <assert_contents>\n+                        <has_text text=">"/>\n+                    </assert_contents>\n+                </element>\n+            </output_collection>\n         </test>\n     </tests>\n     <help><![CDATA[\n'
b
diff -r 3126da33847c -r a8f207b43f64 macros.xml
--- a/macros.xml Sun Sep 26 12:29:15 2021 +0000
+++ b/macros.xml Mon Dec 13 15:14:25 2021 +0000
b
@@ -4,7 +4,7 @@
     This is the HOMER version that introduced the preparsed structure expected
     by the current version. -->
     <token name="@IDX_VERSION@">4.11</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@VERSION_SUFFIX@">2</token>
     <xml name="xrefs">
         <xrefs>
             <xref type="bio.tools">homer</xref>
b
diff -r 3126da33847c -r a8f207b43f64 test-data/CTCF_peaks_shifted.bed
--- a/test-data/CTCF_peaks_shifted.bed Sun Sep 26 12:29:15 2021 +0000
+++ b/test-data/CTCF_peaks_shifted.bed Mon Dec 13 15:14:25 2021 +0000
b
@@ -22,19 +22,3 @@
 mm10_dna 975927 975928 chr2:74715602-74716472 18.4944 + 74715927 74715927 236,28,36
 mm10_dna 1224543 1224544 chr2:74964211-74964829 19.6671 - 74964543 74964543 46,49,145
 mm10_dna 1237548 1237549 chr2:74977360-74977813 6.64332 + 74977548 74977548 236,28,36
-mm10_dna 1315369 1315370 chr2:75055077-75055687 25.0987 - 75055369 75055369 46,49,145
-mm10_dna 1398095 1398096 chr2:75137844-75138351 14.9999 - 75138095 75138095 46,49,145
-mm10_dna 1411177 1411178 chr2:75150910-75151415 21.6232 - 75151177 75151177 46,49,145
-mm10_dna 1412696 1412697 chr2:75152384-75152964 19.3701 - 75152696 75152696 46,49,145
-mm10_dna 1468425 1468426 chr2:75208163-75208613 20.9151 - 75208425 75208425 46,49,145
-mm10_dna 1572439 1572440 chr2:75312148-75312690 12.976 - 75312439 75312439 46,49,145
-mm10_dna 1578398 1578399 chr2:75318086-75318603 15.8984 - 75318398 75318398 46,49,145
-mm10_dna 1775510 1775511 chr2:75515254-75515778 6.45021 - 75515510 75515510 46,49,145
-mm10_dna 1823785 1823786 chr2:75563556-75564002 6.81984 - 75563785 75563785 46,49,145
-mm10_dna 1848782 1848783 chr2:75588511-75588998 13.9441 - 75588782 75588782 46,49,145
-mm10_dna 1850417 1850418 chr2:75590116-75590691 15.844 - 75590417 75590417 46,49,145
-mm10_dna 1863329 1863330 chr2:75603091-75603554 11.5679 - 75603329 75603329 46,49,145
-mm10_dna 1865891 1865892 chr2:75605600-75606199 11.5138 + 75605891 75605891 236,28,36
-mm10_dna 1918764 1918765 chr2:75658517-75658909 0.26272 + 75658764 75658764 236,28,36
-mm10_dna 2041339 2041340 chr2:75781071-75781581 10.7504 - 75781339 75781339 46,49,145
-mm10_dna 2046285 2046286 chr2:75786025-75786608 17.3776 + 75786285 75786285 236,28,36
b
diff -r 3126da33847c -r a8f207b43f64 test-data/chr2_subset.fa
--- a/test-data/chr2_subset.fa Sun Sep 26 12:29:15 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b"@@ -1,40942 +0,0 @@\n->mm10_dna range=chr2:73740000-75787000 5'pad=0 3'pad=0 strand=+ repeatMasking=lower\n-AACTCAAAACACCCTTAAGAAATAGATATAACTCATATGCCTTTTAAAGA\n-TGGGAAAATCAGTAACTGAGAGACCAAGGCATAAACACAGTTAAACACCC\n-TCAGCACACAGTCAGTCCTTAACCCAGATCATATTATTTCATTGCACATG\n-CTCATAACTGTGACTGATAACAGAGATGCTTATGAACAAGGCAACAGTGA\n-GAGCCAGGCAGCTCATCTACAAGTATTGATTTAGTCAGAAAGAACCAGAG\n-AATGGAGAGGAGGCACTCTAAAGAAGGTCACATGAATAGAAGGCTATGTA\n-AACTGCTCAGACAGGCTCTGCTGGTATTTGAGGGGACCCTTCAAATGCAG\n-GCCCAAGAGTCTAACACTAAGCTAAATAATGTATTCAAATAGCCAGTGTG\n-GAAGGCTAGCAGAGGGATGCTGATATCTCTTATAGCCAGGTTCATTCACA\n-GAATTGGTAACATTCTTGCTATTGATGGCTTCCTTATACTACATCCTTCA\n-CTGGAAGTCTTGGGATGTTCCCTAGTTGTCATGTCCAAATAGAGTGACCA\n-TATCTTAAATCTATTAAGGCTGAAGTGCTAACTCTGAGCCCCACATCATG\n-CTTATCTCTTGAAGCACCACTATCCTGACTGGGCTCAGTGAATGGAAGGG\n-CACTATGAAAGAGTCCTGTTAAGGTCAAGAACAGCAACCTCTTCCAAGTT\n-CAAATTCTCTTAAGCTAAAAATTCTGCTGTAATGTAATATGCAGATAAAC\n-TCTGCATAATCTTGCATTTTCTCTttcctgtaatattaactatgaatcat\n-ttttgttggggatgatttcttttgaaaaaaaggaacctaatatatagggg\n-gtgggaggaagaagtgggtggcagagcagcactcatggttcttgcaatgc\n-ggcattcacaccaactgtggaagtcggtacagaaggtaaattgagacccc\n-ggactggtgtcagtgttgggaaggagggagaaagctgTAAGCCAGGCTAT\n-CAGAGTTCATCCGCATCCCATGTAATGTGTTATCAAGTCAGGGATGGACT\n-ATGTAATCCTGTGTGAAAGATGGAAAACTGACTCTATTCATTTGGGAGAC\n-AGGAATGGAAAATCCAACATGCCTAAAATCTCCAACAGATAAGAAAGGAA\n-CTAACAAGCTTTTCTGAGGAACTTCCAATGACGTGAGGGAAAGCTTCTAA\n-TGTACAGAGGTCACAGATGACCAATGACAGTTCCTTTCCCCAGTATTCTC\n-ATTTCATGACAGCGAGTCTATAGAGAACTCGATGTTCTCACAGCTTCACT\n-CACATCCAGATCACCCTAACATTATTTTGCCTTGAATCTCTATGTTCATC\n-AGATTCCAATCTTCTACCTGCAGTTACTGCAGTAACTGTGAAGGTGAAAA\n-ACAGAACGCTCAATACAATGACTGTGCTCTCCGCTATTTTCCACAGTCAA\n-GTGGGCTGCCACCCAACCTTCCTACCTCCTTCTAATCACAGCCCATGCTT\n-CAACATATGGTGTGCAGAACAGCAGTGACTCAACCCTTTGCAAGATCACA\n-AACAGCACAAAGAAATATGAAGCAATTATTAAGTGTCATAGGTCCCACAG\n-ACAACCATGGCTCTCTTGCAGGGCACTTGTGAAACAATGAAAAGAAGGCA\n-AAGGTGGAAGGTAgaccaggaacgtagctcagctggcagagggctttggg\n-cagcacaagccaggacctggacttgatcaactgaatcacagtggctcata\n-cctataatcccagtactcaggagaatccgaagttcaatgtcattttcagc\n-tacacagcaagttcaaagccagcctaggttagagattctgtcttaaaaTT\n-TTTTAACAATTTTTTTTTTAAAGTGGAGGATACCTGAGAACTTCCCATTT\n-GGAGCTTAGGTGGAAGGAAGAGTTTTCCGGCAGGCTAATACAAATTTCTG\n-ACAGGCAGACTATAATGACCTGTCTCACTCTGAGGGGCTAGTTTTGGCAG\n-GACAGTGGTGATCGACAACATAGGATACAGGCTTATGGAAACCTGCCTGT\n-ACTAGCACTTACTAGTAAAAGTCATAAGTTTACTTTTGGCAAGTCAAGGA\n-CAAAGAATGTCTGCTGCTGTTAATACTATTTTTCCTGAGATAGCTGATAT\n-TAAAGAAATTTTCAGGAAGATACACACAGATGACAGTAGAAAGATTTCCA\n-TACAGTCCTCAGAACTTCTGATGCCAGAGTCAAACGTATGACAGAGGAGA\n-GCTCCTTCCTGTCACTCTGCAACAAACAGCTCCAAAGCATTTCCTGCATG\n-ACAAACAGCAAAAAGCCTGACAAAAGTGAATGTAACAAATACATGTGTGT\n-GTATAAAATAAACATACAACACATAAAATATATGCATAAAATCAAGCACA\n-AAACATAATGCGAATATAGGTATCAGTTAAAACCATATAAAGCAATATAT\n-GATTCATAAACAGGACAAGAGATCTTTTACATGAACACAGCTACACGTTA\n-ATGAATTCCTACCAACTATCAGTAAGAAGAGACATGTCAACAGAATACAT\n-ACTTGAAAGAACTTTCTAAGATATCTTTATTTAAGCAGTCATTGTTTTCA\n-CATTTCTACCATAGGACTCATGCTTGAAATGTGGTCCAGTAAAATAGACA\n-GGCAAATCATTGACAATTCTTAAATATCTAAGTATCCTTTTGCTTAATTT\n-GAAGTTCAAACTAAAACATCCTAATAAACAACATACTAGAAGCAGACTGA\n-GTGGCTTCTGCAGGCAGCTACGAAGAAGCATTTGGTCTAAAATACTATCT\n-GCATGCTTTACTGCAGTAACATCGTTAGGGTCCCTCCAGGCTACTAGCGT\n-CTGGAAAACAAAAGAGCATTGTCACTGCAAAGTGAAAAagaatgatatga\n-ctcaagaggtaaaggcaggcagatctctgagttcaaggccagtctggtct\n-acagagtaagttctaggacagccagggttacagagaaaaaAAAAAAAAAA\n-ATGAGGTTCAAATCAATATCAAACACAGAGAAGTTAAGTGCAATGGAAAA\n-ATATTTCCATTGCTTTTATATTCTATAAGAAATGAATATTTATCAGGGGC\n-CAAGGAGCTTTCTCCAGAAGCAACCTAGGGTGTGTTCACTTAGAAATAAC\n-CCAGAACTTACAATTCTAGCCTTAACCTACTTTTAAACTTTTATGTGCAT\n-TTTGTTCAAAAGCAATTTACACTATTTTTCTGTATGTATGTTTGCattta\n-tttttatttatttatttaGTTTTGGCCTTTTTtttttttccttcaagaca\n-gggtttctctttgtagccctgactgtcctggaactcactctgtagaccag\n-gttggcctcgaactcagaaatccgcctgcctctgcctcccaagtgctggg\n-atcaaaggcgtgcgccaccaccaccaggcCTGCATTTATTTTTTAATGTT\n-GAGAAGTAGGTAGACACGACTTCTTACAGCTAATGGACCAAGCAAAAAGA\n-GGGTATAAGGAACACAGCCAGTTTTTGCCACAGGCCACAGAATTTAAGGG\n-GACCCCATACAAAAATTTTAAGATAATGATAATCAGAAATTAGAGGACAT\n-GAACAAAATGTATCTGAGTATGAAACAAAACACTGTAACACAACCTAAAA\n-AACCCAGTTAGAGAAAAAGCTGTTTTATATTTCACAAATGTCAAAAAGAT\n-GCACAAAAAAGATATTTCATCTATATTATTGAATATCCACTTGTT"..b'GCACATGTGGGTACTGTGTTCCTTTATTGTCCTTAGGAAGCAT\n-TCTGTATTGTACATAACTGTGCTGTACTTTCATAAGTGTAACAGCATGGT\n-GGCTGTATCTACTCCAACAATGTCATGACAGGGATGGAATGTGCTGTGCT\n-ACATTGTGAGGTTGGCAATGTCACAAAATCTCCAGTTGCATTGTATCTGT\n-AAGACACCAACTATGCCGGCCTGTGACTACCCAACTATCAGTGTGTGGGT\n-CCTTAACTACGTCCCCCCCACCCCCAGAAGGAATGCAGTAGGGATGGGtt\n-tcttttttttttttttttcttttctttcttttttttttttttttttggtt\n-tttcgagacagggtttctctgtgtagccctggctgtcctggaactcactt\n-tgtagaccaggctgggctcgaactcagaaatccgcctgcctctgcctccc\n-gagtgctgggattaaaggcgtgcgccaccacgcctagcAGGGATGGGTTT\n-CTTTTTGACAGTTGCTATGTCCAGATGAAACACAGTAAGCTCATCGGTAA\n-CATAAGACACACTCTATCAGCACCAAGGAGCATGCTGTAAGATGGTCTCT\n-CTACTTCCAATACATCTAGTCTACTAACTCCACGAAGGGCCCAGTGGGAG\n-GGAGACCAGAGCTGAGAGAGCAAGAGCCTTTGTTGCACCATGGAGTAAGT\n-AGAGTGATACGTTTACATACAAAGATTTCTTAGATAGATAAATGATCATC\n-CTACCAGATTTATTCATTGTAACTGGAACAGTGGGACTTTGTTTCTTGTG\n-GAGCAAGGCCACATTCTTCCGTGGAAGGCAGCCTTACAGGAAGGTGGTTA\n-TAGTAGCACCATTGTTAGAGCAGTTCACCCCAGTTTAGAAGGTTTAAATG\n-CCAATGACTATGTGACTATGAATATCTATTGTGTTAATCCTGCAATTTTA\n-TGTACATTGCAGTGCTCTGCTGCTGAGCCCCGTACTTAGCTCTTGGGTTT\n-ttttgttttgttttgttttgttttgttttgttttAGCTTTAGCTTGGCCT\n-GAACTATGTAGCCTGGGCAGGCCTCACGCTTGTGGTCCTCCCCACCTCAG\n-CATCAAGTAAATAATATTAATGTTTCTTCTGCAATAGCTTTTCTGCTATT\n-TTTAGGGAAAGAAATATGCAATTCACATTTTTTGATTCAAGATTATAATA\n-TCATAATCATATAATACCTGATTGTTTAGATATTAATGAGGCTGGTAGAA\n-AATATCATCCCTTACGAGGGAAATGTGCTATTTTCATCTTATAAAGAAGC\n-AACTCAGAGACACTTGCGTGTTCAGTAATGTGAGGAGACAGGAGCAGGTT\n-CTAATTCAGTTCTATTTGACTCCTGGTCCTAAAACTATTGGTTTATCTGA\n-TCTTGGAGCAGAGTAATTTTAGAAAATGCTTGCGTATTCTttttgttgtt\n-gaagtgggggtctttctcaatagcccacattgaccttgaactcataaact\n-caagcaatcctcctgatacccaagtagctgagtctacatgtctgggcctc\n-tgtgctcggttGCCTCTGATTTTCATCATCTGAAACTAAGACCTGCCAAA\n-GTGGGGGGGGGGCAGTTTTCCTGAGATTTCGAGACTTGCATGCGCGGTAC\n-TGGATTGATGTGAGAGCACCCTCTGGGGCCATCTCTGTTCTAATCCCCGG\n-GCTTGGTCTTCTGCAGCCTTTGGAAAGGTTATGTTTCAGAATAGGTGTTG\n-GTTTACACCGATGTTTTTCAATGCTTCATAGTCATTTTTCAAGATAATTC\n-AGTTATGACTCTATAAAAGAAAACTAAAGAAACAGGGTTAAATTCAGGTC\n-ATTTGAAAGGAAGTCTTTgccagctggtggtagcacatgcttttaatcct\n-agcactgtggaggcagacacaagtggatttctgtgagttcgaggccagcc\n-tggtctacagagctagtcccaggataaatagtgttTCCCTATCttttttt\n-ttttttttttaatttctttgaaaaatatttCTGGCAGCTTCAAGGGCAGA\n-AGTTGTTTATTTGTGTGTGTGatttttttaattaggtatttttctcgttt\n-acattttcaatgctatcccaaaggtcccccatacccaaccccccaatccc\n-ctacccacccactccccctttttggccctggcgttcccctgtactggggc\n-atataaagtttgcaagtccaatgggcctctctttgcagtgatggccgact\n-agaccatcttttgatacatatgcagctagagacaagagctccggggtact\n-ggttagttcataatgttgtcccacctatagggttgcagttccctatagct\n-ccttgggtactttctctagctcctccattgggggctgtgtgaGTGTTGCC\n-CTATCTTGAAAAagaaggaggaggaagaggaggaggaggagaaagaAAGA\n-GTATTTGTGTGTGACATAGTCTGAATTTGCCAGGTTTGTGAACATATTTT\n-CTAGTAATTTTCAACAATTAATTCAGCATACCATGAGTTAATTTATGGTG\n-ATGGAGAACCTTTCTGTGTATGGATGCCTGGCCATTTTCCCAACAAATTC\n-ATTTGCTGAAGATGGATTTTTACAAGTAGGTTTGGAAGTGAATTAGGTGT\n-GAATATAGAAACTGGATTTTAAATATGCGTTTCTTTAGTCCTTTGTTTTA\n-TATGTGCTGAAAAGAAAGCAGTAACATCTCTCTCTCCCCCTCCTGCCTTT\n-CCTCCCTTCTTACTttccacccccctcttcttcccctggtccctccccgt\n-acagtacacctcccacttctctactcactcctcctctggctttccttttg\n-aaaggccaggctcccatggctatcaaccggccaaggtatatcaagtttca\n-gtaagaAGGAAGCTGGATTCTTGAAGAACTAAGGACAGATGGAGAGCGAA\n-AAATCTTGGCCAACCATGGGATTTTAAACAAATCATTTCTCCTTTTCCTT\n-CTTCATACCTCTCTCTTTTTGATCTAGCTAACTACAAAGGCTCCTCCAGT\n-TTCTAACTGCCTGTAGGATGACACTGGGGGCAGACACAAGATGTACATTC\n-TTAGTAAACAAAGCCATGCTCCGTTGTGTCCACTAGGAGGCACTGTAGCA\n-CCAGTGCACTATCGTCTGTCCTTCAATTTATCCAAGTCAATTTATTTTCC\n-TTAAACCTTCAAATTCAAGTTTATGCTACCGTGAAAGCTAAGATTTAATT\n-TAGTAACAGCATCTGGTTAAATCTAGGAAGAATTCAGCCGAGAGACTTAC\n-TGACTAGGGAAAATCAGAATCCCGGAGCCATTTGCATTGTAAATGGTGTC\n-TCATGATTTTGAAAGGGCATCAGGAGGGGCTTGAAATTCTCAAGGGAAAT\n-ATAGATTGGACCAATCCCGACTTccctaacaatgaagtcataaatgaaga\n-agtgtgcaaactccatgtgaagacaactataatattactcaagggtagaa\n-aagaatacttaaattgagatataccatgttcttggatacaaaggttcagt\n-gctgtATAAATTTAAAGCTATTCTACAAATATACTCACACATATAATGTG\n-ATCAAAAATTTTTTTTAGAAACTTGATAAAGTTTATTGAGGACAATGAAG\n-CTGGAAAAAATAAAGGAAGAAAATTGAACTTTTTGAAGGGGAAGAGAATT\n-TGAACCACTGGTTTTTTGTTTGTTTGTTTTATTTTAAATCATGAAACAGT\n-TGAACGTAAATATCTCTTTATAGGTCTCTgggaatggctgagcagggaac\n-gtgcctgccacacatgcatgaagacgtgagtttgtatacccagagtatac\n-a\n'
b
diff -r 3126da33847c -r a8f207b43f64 test-data/chr2_subset.fa.gz
b
Binary file test-data/chr2_subset.fa.gz has changed
b
diff -r 3126da33847c -r a8f207b43f64 test-data/generate_tests.sh
--- a/test-data/generate_tests.sh Sun Sep 26 12:29:15 2021 +0000
+++ b/test-data/generate_tests.sh Mon Dec 13 15:14:25 2021 +0000
[
@@ -11,10 +11,10 @@
   wget https://raw.githubusercontent.com/lldelisle/scriptsForWilleminEtAl2021/main/CTCF/E12_Limbs_Wt_CTCF_colored.bed -O test-data/CTCF_peaks.bed
 fi
 if [ ! -e test-data/CTCF_peaks_shifted.bed ]; then
-  cat test-data/CTCF_peaks.bed | grep "chr2" | awk -v OFS="\t" '{$1="mm10_dna"; $2-=73740000; $3-=73740000; print}' > test-data/CTCF_peaks_shifted.bed
+  cat test-data/CTCF_peaks.bed | grep "chr2" | awk -v OFS="\t" '$3<75000000 && $2>73740000{$1="mm10_dna"; $2-=73740000; $3-=73740000; print}' > test-data/CTCF_peaks_shifted.bed
 fi
 # chr2_subset.fa was downloaded from UCSC
-# https://genome.ucsc.edu/cgi-bin/hgc?hgsid=1136019667_XgAJOvV4a3CY4ibCu6RrUcvGxLNo&g=htcGetDna2&table=&i=mixed&o=56694975&l=56694975&r=56714605&getDnaPos=chr2%3A73740000-75787000&db=mm10&hgSeq.cdsExon=1&hgSeq.padding5=0&hgSeq.padding3=0&hgSeq.casing=upper&hgSeq.maskRepeats=on&boolshad.hgSeq.maskRepeats=0&hgSeq.repMasking=lower&boolshad.hgSeq.revComp=0&submit=get+DNA
+# https://genome.ucsc.edu/cgi-bin/hgc?hgsid=1234982067_JnS4z30UVCNarTg26Ztd1Oh6nfu6&g=htcGetDna2&table=&i=mixed&o=56694975&l=56694975&r=56714605&getDnaPos=chr2%3A73740000-75000000&db=mm10&hgSeq.cdsExon=1&hgSeq.padding5=0&hgSeq.padding3=0&hgSeq.casing=upper&hgSeq.maskRepeats=on&boolshad.hgSeq.maskRepeats=0&hgSeq.repMasking=lower&boolshad.hgSeq.revComp=0&submit=get+DNA
 
 . <(planemo conda_env homer_gtf_to_annotation.xml)
 echo "$(which homer)"
@@ -50,6 +50,7 @@
 findMotifsGenome.pl test-data/fake_phix_peaks.bed test-data/phiX174.fasta fake_phix_peaks_bed_motif
 mv fake_phix_peaks_bed_motif test-data/motif_test1
 # Thus I needed to use has_text for the other outputs
+# gunzip -c test-data/chr2_subset.fa.gz > test-data/chr2_subset.fa
 # findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif
 # mv CTCF_peaks_shifted_bed_motif test-data/motif_test2
 # findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif -mask
b
diff -r 3126da33847c -r a8f207b43f64 test-data/scanMotif_res1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/scanMotif_res1.bed Mon Dec 13 15:14:25 2021 +0000
b
@@ -0,0 +1,1 @@
+phiX174 1449 1460 1-ATGAGCTTAATC,BestGuess:Oc 12.355433 +
b
diff -r 3126da33847c -r a8f207b43f64 test-data/scanMotif_res1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/scanMotif_res1.txt Mon Dec 13 15:14:25 2021 +0000
b
@@ -0,0 +1,1 @@
+1-ATGAGCTTAATC,BestGuess:Oc/dmmpmm(Noyes_hd)/fly(0.654)-1 phiX174 1449 1460 + 12.355433 ATGAGCTTAATC