Repository 'dotplot'
hg clone https://toolshed.g2.bx.psu.edu/repos/pdeford/dotplot

Changeset 0:a8fcdd1c2cce (2016-11-01)
Next changeset 1:9642674cbe9b (2016-11-01)
Commit message:
Uploaded
added:
dotplot/._dotplot_chr4.png
dotplot/dotplot.py
dotplot/dotplot.xml
dotplot/dotplot_chr4.png
dotplot/test-data/._dotplot_chr4.pdf
dotplot/test-data/._human_vs_chicken_chr4.tabular
dotplot/test-data/dotplot_chr4.pdf
dotplot/test-data/human_vs_chicken_chr4.tabular
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/._dotplot_chr4.png
b
Binary file dotplot/._dotplot_chr4.png has changed
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/dotplot.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dotplot/dotplot.py Tue Nov 01 15:19:49 2016 -0400
[
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import sys
+
+def main():
+ input_file = open(sys.argv[1])
+ filter = int(sys.argv[2])
+ png_path = sys.argv[3]
+
+ sizes = {}
+ alignments = {}
+
+ for line in input_file:
+ fields = line.split()
+ r_start = int(fields[5])
+ r_end = int(fields[6])
+ q_start = int(fields[16])
+ q_end = int(fields[17])
+ contig_name = fields[9]
+ contig_size = int(fields[11])
+ reference_size = int(fields[3])
+
+ if contig_name not in sizes:
+ sizes[contig_name] = contig_size
+ alignments[contig_name] = []
+
+ if abs(r_start - r_end) > filter:
+ alignments[contig_name].append([(r_start, r_end), (q_start, q_end)])
+
+ cumulative = 0 
+
+ plt.figure()
+ yticks = [[], []]
+ for key in sorted(sizes.keys(), key=lambda x:sizes[x], reverse=True):
+ for reference, query in alignments[key]:
+ plt.plot(reference, [x + cumulative for x in query], 'k-', zorder=10)
+ yticks[0].append(cumulative + sizes[key]/2)
+ yticks[1].append("{} bp".format(sizes[key]))
+ cumulative += sizes[key]
+ plt.axhline(cumulative, zorder=1, color='lightgray', )
+
+ plt.xlim([0,reference_size])
+ plt.ylim([0,cumulative])
+ plt.yticks(*yticks)
+ plt.tick_params(axis='y', which='both',length=0)
+ plt.xlabel("Position in Reference (bp)")
+ plt.ylabel("Contigs by size")
+ plt.tight_layout()
+ plt.savefig(png_path, dpi=200, format='pdf')
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/dotplot.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dotplot/dotplot.xml Tue Nov 01 15:19:49 2016 -0400
b
@@ -0,0 +1,42 @@
+<tool id="lastz_dotplot_matplotlib" name="Dot Plot" version="1.0.0">
+  <description>of LASTZ tabular output</description>
+  <requirements>
+    <requirement type="package">numpy</requirement>
+    <requirement type="package">matplotlib</requirement>
+  </requirements>
+  <command interpreter="python">dotplot.py $input $filter $outfile1</command>
+  <inputs>
+    <param name="input" type="data" format="tabular" label="LASTZ tabular output" help="Dataset missing? See TIP below"/>
+    <param name="filter" type="text" value="0" label="Filter out alignments shorter than (bp)"/>
+  </inputs>
+  <outputs>
+    <data format="pdf" name="out_file1"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input"  value="human_vs_chicken_chr4.tabular" ftype="tabular"/>
+      <param name="filter" value="100"/>
+      <output name="out_file1" file="dotplot_chr4.pdf" compare="sim_size" />
+    </test>
+  </tests>
+  <help>
+
+**Syntax**
+
+This tool creates a dot plot of the contents of a LASTZ tabular file resulting from the alignment of one or more sequences to a single reference sequence. 
+
+If multiple query sequences are present, they will be sorted by size, offset from one another in the dot plot, and separated by a gray line. 
+
+- **filter** all alignments shorter than the filter will be excluded from the analysis.
+
+-----
+
+**Example**
+
+Chicken chromosome 4 was aligned against human chromosome 4 using LASTZ, specifying tabular output, no transitions, and performing gap free extension. 
+
+Using a 100 bp filter above we get the following dotplot.
+
+.. image:: dotplot_chr4.png
+</help>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/dotplot_chr4.png
b
Binary file dotplot/dotplot_chr4.png has changed
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/._dotplot_chr4.pdf
b
Binary file dotplot/test-data/._dotplot_chr4.pdf has changed
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/._human_vs_chicken_chr4.tabular
b
Binary file dotplot/test-data/._human_vs_chicken_chr4.tabular has changed
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/dotplot_chr4.pdf
b
Binary file dotplot/test-data/dotplot_chr4.pdf has changed
b
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/human_vs_chicken_chr4.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dotplot/test-data/human_vs_chicken_chr4.tabular Tue Nov 01 15:19:49 2016 -0400
b
b'@@ -0,0 +1,44574 @@\n+3510\tchr4\t+\t190214555\t47505\t47504\t47601\t97\tAACAAAGAAGAAGAAGAAGAAGAAGAAGAACAAGAACAAGAAGAACAAGAA---CAAGAAGAACAAGAACAAGAAGAAGAAGAAGAAAGAAAAGAAGAAG\tchr4\t+\t94230402\t10269793\t10269792\t10269892\t10269793\t10269792\t10269892\t100\tAACAGAAAAACAGTGAAAGAAGAAGAGAATAAAGAAACAAAAGGCCAAGAAAAGCATGAAGAACAAAGGAAAGAGTATTGGGAAGAGAGAATAAATGAAG\t....:.:..:x..x::..........::.xx.....xx.:...:x......---..x.........:::x....:x.xx::.....:....x.:.x....\t51M3I46M\t66/97\t68.0%\t100/94230402\t0.0%\t3/97\t3.1%\t-10222288\t-84008114\n+3078\tchr4\t+\t190214555\t50454\t50453\t50512\t59\tGCACCTGTAGTCCCAGCTACTGGGGAGGCTGAGTCAGGAGAATGGCGTGAACCCGGGAG\tchr4\t+\t94230402\t76033206\t76033205\t76033264\t76033206\t76033205\t76033264\t59\tGCACGTGTAGTTCCAGCTACTTGAAAGGCTGAGCCCTGTGAAGCACTTGAACCCAGAAG\t....x......:.........x.::........:.xx.x...xx:.x.......:.:..\t59M\t44/59\t74.6%\t59/94230402\t0.0%\t0/59\t0.0%\t-75982752\t-18247650\n+3124\tchr4\t+\t190214555\t82350\t82349\t82397\t48\tAAGTCCATTTTTTTGGTTTGTGGTTTTTTTTTTTTTTTTTTTTTTTTT\tchr4\t+\t94230402\t14492251\t14492250\t14492298\t14492251\t14492250\t14492298\t48\tAAATGCATGTTTTCTGTTTGTGGTTTTTTTTTTTTTTTTTGTATTTTT\t..:.x...x....:x.........................x.x.....\t48M\t41/48\t85.4%\t48/94230402\t0.0%\t0/48\t0.0%\t-14409901\t-79820501\n+3251\tchr4\t+\t190214555\t82354\t82353\t82406\t53\tCCATTTTTTTGGTTTGTGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGATG\tchr4\t+\t94230402\t81894504\t81894503\t81894556\t81894504\t81894503\t81894556\t53\tCCACTACTGTATTTTGTGGTTTTTTTTTTCTTTTTTCTTTTGTTTTTGAGCTG\t...:.x:.x.:x.................:......:....x........x..\t53M\t43/53\t81.1%\t53/94230402\t0.0%\t0/53\t0.0%\t-81812150\t-12418252\n+3941\tchr4\t+\t190214555\t86070\t86069\t86208\t139\tAACATACAGGAATTCATGCTGGAGAGAAACCCTACAAATGTGAAAAATGTGGCAAAGCCTTTAATAGGTCCACATCACTTAGTAAA-CATAAGAGAATTCATACTGGAGAGAAACCCTACACATGTGAAGAATGTGGCAA\tchr4\t+\t94230402\t81517404\t81517403\t81517542\t81517404\t81517403\t81517542\t139\tAGCATAAAAAGACTCATACGGCAGATAAAGTGTTCACCTGTGATGAATGTGGGAAGTCATTCAACATG-CAACGAAAATTAGTAAAGCACAGAATTAGACATACTGGGGAGAGACCATACAGCTGTTCAGCATGTGGTAA\t.:....x.:::.:....:.x.x...x...x:x.x..xx.....x:.......x..:x.x..:..:.x.-.x..:xx.x........-..:.::.xx.xx........:....:...x....xx...xx..x......:..\t68M1D17M1I53M\t92/138\t66.7%\t139/94230402\t0.0%\t2/138\t1.4%\t-81431334\t-12799068\n+4798\tchr4\t+\t190214555\t86081\t86080\t86401\t321\tATTCATGCTGGAGAGAAACCCTACAAATGTGAAAAATGTGGCAAAGC----------------CTTTAATAGGTCCACATCACTTAGTAAACATAAGAGAATTCATACTGGAGAGAAACCCTACACATGTGAAGAATGTGGCAAAGCCTTTAGACGGTCCACAGT--TCTGAACGAACATAAGAAAATTCATACTGGAGAGAAACCCTACAAATGTGAAGAATGTGGCAAAGCCTTTACAAGGTCCACAACACTGAATGAA-CACAAGAAAATTCATACTGGAGAGAAACCCTACAAATGTAAAGAATGTGGCAAAGCCTTTAGATGGTCCACAAGCCTG\tchr4\t+\t94230402\t81517246\t81517245\t81517563\t81517246\t81517245\t81517563\t318\tATTCATTCTGGAGAAAAGCCTCATCTGTGTGATATCTGTGGCAGAGGTAGGTAAGAACCAAAGCTTTTGCCTCTTAAGGTCTTTGGATTGAAATGTGAGAAGTCATTTTAAACTG---TCTCATATTTTTCTAGGATTCAGTAA--------------CTTCAGTAATTTGAAGGAGCATAAAAAGACTCATACGGCAGATAAAGTGTTCACCTGTGATGAATGTGGGAAGTCATTCAACATG-CAACGAAAATTAGTAAAGCACAGAATTAGACATACTGGGGAGAGACCATACAGCTGTTCAGCATGTGGTAAGATTT----ACTGTGCATAGGCTTG\t......x.......:..:..::.:xx:.....x.xx.......:..x----------------....x::xxx.:x.x:..x:.x::.x:.x..:x.....x....x:.::.xx.---:.::.:.:x.x.xx..:..x::.:..--------------.:x....--.:....x..:.....:..:.:......x.x...x...x:x.x..xx.....x........x..:x.x..:.xx.x.-.x..:.x.x.x.:.:..-....::.xx.xx........:....:...x....:x...xx..x......:..::::.----.:x..x..:.:..:..\t47M16I52M3D26M14D7M2I76M1D17M1I58M4D16M\t186/299\t62.2%\t318/94230402\t0.0%\t41/299\t13.7%\t-81431165\t-12799237\n+5064\tchr4\t+\t190214555\t86500\t86499\t86801\t302\tTATTCATACTGGCGAAAAACCCTACACATGTGAAAAATGTGGCA-----------------AAGCTTTTAACCAATCCTCAAGTCTTATTAT--ACACAGGAGCATTCATTCTGAACAAAAACTTTACAAATGTGAAGAATGTGGCAAAGCCTTTACTTGGTCCTCATCCCTTAATAAACATAAGAGAATTCATACTGGAGAGAAACCCTACACATGTGAAGAATGTGGCAAAGCTTTTTATAGGTCCTCACACCTTGCTAAACATAAGAGAATTCATACTGGAGAGAAACCCTACACGTGCGAAGAATGTGGCAAAGCTT\tchr4\t+\t94230402\t81517245\t81517244\t81517547\t81517245\t81517244\t81517547\t303\tTATTCATTCTGGAGAAAAGCCTCATCTGTGTGATATCTGTGGCAGAGGTAGGTAAGAACCAAAGCTTTTGCC---TCTTAAGGTCTTTGGATTGAAATGTGAGAAGTCATTTTAAACTG---TCTCATATTTTTCTAGGATTCAGTAACTTCAG'..b'xx.:....x:..:..x.x...:.....\t65M9D54M\t78/119\t65.5%\t119/94230402\t0.0%\t9/119\t7.6%\t131862986\t58351569\n+4090\tchr4\t+\t190214555\t189898043\t189898042\t189898170\t128\tAAAGAAAGAGAAAGAAAGAAAAGAAAGAAAGGAAAGAAAGAAAGAAAAGAAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAGGGAGAAAG\tchr4\t-\t94230402\t30533164\t30533163\t30533286\t63697117\t63697116\t63697239\t123\tAAATAAAAAGGAAGAAAGAAAAGAGGGGAGAGAAAGACTGTGAAAAAGTAAAGGCTCAAGACAAAGACCGAAATA-----GAAGAATGAGGAAAAAAAGAGAAAACACCGGAAAGGAGAGGAATAAAG\t...x...:..:.............::.:.::......xx.x:.:...:x...:.xxx:.:.x.....xx....x.-----:.....x:..:..:...::.::...x.xx.::..:..:...:.x....\t75M5D48M\t80/123\t65.0%\t123/94230402\t0.0%\t5/123\t4.1%\t159364879\t30849676\n+3046\tchr4\t+\t190214555\t189898066\t189898065\t189898166\t101\tAAAGAAAGGAAAGAAAGAAAGAAAAGAAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAGGGAG\tchr4\t-\t94230402\t28816821\t28816820\t28816921\t65413482\t65413481\t65413582\t101\tAATGGTAGGAAAAAAAGAATGAAAAGCTGACAGAGATAAAAAGAAAGAAAAGAACACATGGAGCTGTTATGAAGCCATGAAAGAAAGAAATCAAAGCAGAG\t..x.:x......:......x......xx:.x.:...x.:...........::..x.x.x:..:xx:xx.x...:xx.x..........:.xx:..:x:...\t101M\t65/101\t64.4%\t101/94230402\t0.0%\t0/101\t0.0%\t161081245\t29133310\n+3991\tchr4\t+\t190214555\t189946201\t189946200\t189946286\t86\tTGATCGTTT-AGTACAGGTGTATTCAATCTTTTAGCTTCCCTGGGCCACAATGAAAGGAGAGGAATTGTCTTGGGCCACACATAAAA\tchr4\t-\t94230402\t9885843\t9885842\t9885929\t84344474\t84344473\t84344560\t87\tTGATTGCTTGAATTCAGGACTATCCAACCTTCTGTCTTCCCTGAGCCACAATGAGTGAATAGGAATTACCTAGGGCTGCATCTAAGA\t....:.:..-.:.x....xx...:...:...:.:x........:..........:x.:.x.......::..x....::..:x...:.\t9M1I77M\t62/86\t72.1%\t87/94230402\t0.0%\t1/86\t1.2%\t180060358\t10154197\n+3466\tchr4\t+\t190214555\t190010480\t190010479\t190010584\t105\tCAGGCT---GGCATGCAGTGGCATGATCTCAGCTCACTGCAACCTCTGCCTCCCCGGTTCGAGTGATTCACCTGCCTCAGCCTCCCAACTAGCTGGGATTACAGGTGC\tchr4\t-\t94230402\t18197091\t18197090\t18197197\t76033206\t76033205\t76033312\t107\tCAGGCTTAGGGCACACATCTGCTTGGCATCATGTT-CTACAGCCCAAAGCTTCTGGGTTCAAGTGCTTCACAGGGCTCAGCCTTTCAAGTAGCTGGAACTACACGTGC\t......---....::..x:x..x..::x...xx.:-..:..:..:xx:x..:.:x.....:....x.....xx.x........::...x.......:.:....x....\t6M3I26M1D72M\t71/104\t68.3%\t107/94230402\t0.0%\t4/104\t3.8%\t171813389\t18401166\n+3393\tchr4\t+\t190214555\t190019694\t190019693\t190019835\t142\tCTCCCTCGCCCGCTCCCTTTCTCTACTTCCCTCTCCACCTTGCCCGCTCTCCTCCCTCTCTCTCTCTCTCTCTCTCT-CGCTGTTTCT-CTCTCTCCTTCCGTTTCTATCTTTCCATCCCTCTGTC-CTTTGCTTTTCTTCAAGC\tchr4\t-\t94230402\t43081888\t43081887\t43082030\t51148373\t51148372\t51148515\t143\tCTCCCCGGCACGCTCCCTTCCTTCA--TGCTTCAAAATGCTGTCCCTCCTTTTCCTTTTTTTTCTCTCACCCTCCGTGTTCCATTTCGATTCTGAACTTCTGTTTCTTTCTCTCTCTCTCTCTGTAGCTTCTTCTTTCTGTAAAC\t.....:x..x.........:..::.--.x.:..xxx.:x:..:..x::..::...:.:.:.:......x.:...:x.-:x.::....x-:...xxx....:......x...:..:x..:......x-...:x::.....x:..:.\t25M2D50M1I10M1I37M1I18M\t89/140\t63.6%\t143/94230402\t0.0%\t5/140\t3.6%\t146937806\t43276749\n+3046\tchr4\t+\t190214555\t190080775\t190080774\t190080846\t72\tCTCTCCGGCCCCACCAC-----CACCACCGCCACCACGCCCTCCCCCCCCACCCCCCCCCCCCACCACCACCACCAC\tchr4\t-\t94230402\t85088290\t85088289\t85088366\t9142037\t9142036\t9142113\t77\tCTCTCCCGGCTCCGCACGCGTGCGGCCCCGCTCCCCTCCGCTGCCCCCCCAACCCCCCCTACCACCACCACCACCAC\t......x.x.:.xx...-----.:x.x....:x..x:x.x..x........x.......:x................\t17M5I55M\t54/72\t75.0%\t77/94230402\t0.0%\t5/72\t6.9%\t104992485\t85222070\n+3176\tchr4\t+\t190214555\t190102566\t190102565\t190102606\t41\tTCTCACCTCCGTTGGCAAAAAACAAACAAACAAACAAAAAA\tchr4\t-\t94230402\t80284921\t80284920\t80284961\t13945442\t13945441\t13945482\t41\tTCTCTCCCCCTTTGGTAAAAAACAAACAAACAAACAAAAAA\t....x..:..x....:.........................\t41M\t37/41\t90.2%\t41/94230402\t0.0%\t0/41\t0.0%\t109817645\t80396910\n+3217\tchr4\t+\t190214555\t190173429\t190173428\t190173504\t76\tCCGGCCCCACCACCACCACCGCCACCACGCCCTCCCCCACCACCCCCCCCCCCACCACCACCACCACCACCACCAC\tchr4\t-\t94230402\t85088295\t85088294\t85088366\t9142037\t9142036\t9142108\t72\tCCGGCTCCGC----ACGCGTGCGGCCCCGCTCCCCTCCGCTGCCCCCCCAACCCCCCCTACCACCACCACCACCAC\t.....:..:.----..xxx:..x:..x...:.:..:..:.::.......xx..x..x.:.................\t10M4D62M\t52/72\t72.2%\t72/94230402\t0.0%\t4/72\t5.6%\t105085134\t85129421\n'