Next changeset 1:9642674cbe9b (2016-11-01) |
Commit message:
Uploaded |
added:
dotplot/._dotplot_chr4.png dotplot/dotplot.py dotplot/dotplot.xml dotplot/dotplot_chr4.png dotplot/test-data/._dotplot_chr4.pdf dotplot/test-data/._human_vs_chicken_chr4.tabular dotplot/test-data/dotplot_chr4.pdf dotplot/test-data/human_vs_chicken_chr4.tabular |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/._dotplot_chr4.png |
b |
Binary file dotplot/._dotplot_chr4.png has changed |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/dotplot.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dotplot/dotplot.py Tue Nov 01 15:19:49 2016 -0400 |
[ |
@@ -0,0 +1,55 @@ +#!/usr/bin/env python + +import matplotlib +matplotlib.use('Agg') +import matplotlib.pyplot as plt +import sys + +def main(): + input_file = open(sys.argv[1]) + filter = int(sys.argv[2]) + png_path = sys.argv[3] + + sizes = {} + alignments = {} + + for line in input_file: + fields = line.split() + r_start = int(fields[5]) + r_end = int(fields[6]) + q_start = int(fields[16]) + q_end = int(fields[17]) + contig_name = fields[9] + contig_size = int(fields[11]) + reference_size = int(fields[3]) + + if contig_name not in sizes: + sizes[contig_name] = contig_size + alignments[contig_name] = [] + + if abs(r_start - r_end) > filter: + alignments[contig_name].append([(r_start, r_end), (q_start, q_end)]) + + cumulative = 0 + + plt.figure() + yticks = [[], []] + for key in sorted(sizes.keys(), key=lambda x:sizes[x], reverse=True): + for reference, query in alignments[key]: + plt.plot(reference, [x + cumulative for x in query], 'k-', zorder=10) + yticks[0].append(cumulative + sizes[key]/2) + yticks[1].append("{} bp".format(sizes[key])) + cumulative += sizes[key] + plt.axhline(cumulative, zorder=1, color='lightgray', ) + + plt.xlim([0,reference_size]) + plt.ylim([0,cumulative]) + plt.yticks(*yticks) + plt.tick_params(axis='y', which='both',length=0) + plt.xlabel("Position in Reference (bp)") + plt.ylabel("Contigs by size") + plt.tight_layout() + plt.savefig(png_path, dpi=200, format='pdf') + +if __name__ == '__main__': + main() \ No newline at end of file |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/dotplot.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dotplot/dotplot.xml Tue Nov 01 15:19:49 2016 -0400 |
b |
@@ -0,0 +1,42 @@ +<tool id="lastz_dotplot_matplotlib" name="Dot Plot" version="1.0.0"> + <description>of LASTZ tabular output</description> + <requirements> + <requirement type="package">numpy</requirement> + <requirement type="package">matplotlib</requirement> + </requirements> + <command interpreter="python">dotplot.py $input $filter $outfile1</command> + <inputs> + <param name="input" type="data" format="tabular" label="LASTZ tabular output" help="Dataset missing? See TIP below"/> + <param name="filter" type="text" value="0" label="Filter out alignments shorter than (bp)"/> + </inputs> + <outputs> + <data format="pdf" name="out_file1"/> + </outputs> + <tests> + <test> + <param name="input" value="human_vs_chicken_chr4.tabular" ftype="tabular"/> + <param name="filter" value="100"/> + <output name="out_file1" file="dotplot_chr4.pdf" compare="sim_size" /> + </test> + </tests> + <help> + +**Syntax** + +This tool creates a dot plot of the contents of a LASTZ tabular file resulting from the alignment of one or more sequences to a single reference sequence. + +If multiple query sequences are present, they will be sorted by size, offset from one another in the dot plot, and separated by a gray line. + +- **filter** all alignments shorter than the filter will be excluded from the analysis. + +----- + +**Example** + +Chicken chromosome 4 was aligned against human chromosome 4 using LASTZ, specifying tabular output, no transitions, and performing gap free extension. + +Using a 100 bp filter above we get the following dotplot. + +.. image:: dotplot_chr4.png +</help> +</tool> \ No newline at end of file |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/dotplot_chr4.png |
b |
Binary file dotplot/dotplot_chr4.png has changed |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/._dotplot_chr4.pdf |
b |
Binary file dotplot/test-data/._dotplot_chr4.pdf has changed |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/._human_vs_chicken_chr4.tabular |
b |
Binary file dotplot/test-data/._human_vs_chicken_chr4.tabular has changed |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/dotplot_chr4.pdf |
b |
Binary file dotplot/test-data/dotplot_chr4.pdf has changed |
b |
diff -r 000000000000 -r a8fcdd1c2cce dotplot/test-data/human_vs_chicken_chr4.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dotplot/test-data/human_vs_chicken_chr4.tabular Tue Nov 01 15:19:49 2016 -0400 |
b |
b'@@ -0,0 +1,44574 @@\n+3510\tchr4\t+\t190214555\t47505\t47504\t47601\t97\tAACAAAGAAGAAGAAGAAGAAGAAGAAGAACAAGAACAAGAAGAACAAGAA---CAAGAAGAACAAGAACAAGAAGAAGAAGAAGAAAGAAAAGAAGAAG\tchr4\t+\t94230402\t10269793\t10269792\t10269892\t10269793\t10269792\t10269892\t100\tAACAGAAAAACAGTGAAAGAAGAAGAGAATAAAGAAACAAAAGGCCAAGAAAAGCATGAAGAACAAAGGAAAGAGTATTGGGAAGAGAGAATAAATGAAG\t....:.:..:x..x::..........::.xx.....xx.:...:x......---..x.........:::x....:x.xx::.....:....x.:.x....\t51M3I46M\t66/97\t68.0%\t100/94230402\t0.0%\t3/97\t3.1%\t-10222288\t-84008114\n+3078\tchr4\t+\t190214555\t50454\t50453\t50512\t59\tGCACCTGTAGTCCCAGCTACTGGGGAGGCTGAGTCAGGAGAATGGCGTGAACCCGGGAG\tchr4\t+\t94230402\t76033206\t76033205\t76033264\t76033206\t76033205\t76033264\t59\tGCACGTGTAGTTCCAGCTACTTGAAAGGCTGAGCCCTGTGAAGCACTTGAACCCAGAAG\t....x......:.........x.::........:.xx.x...xx:.x.......:.:..\t59M\t44/59\t74.6%\t59/94230402\t0.0%\t0/59\t0.0%\t-75982752\t-18247650\n+3124\tchr4\t+\t190214555\t82350\t82349\t82397\t48\tAAGTCCATTTTTTTGGTTTGTGGTTTTTTTTTTTTTTTTTTTTTTTTT\tchr4\t+\t94230402\t14492251\t14492250\t14492298\t14492251\t14492250\t14492298\t48\tAAATGCATGTTTTCTGTTTGTGGTTTTTTTTTTTTTTTTTGTATTTTT\t..:.x...x....:x.........................x.x.....\t48M\t41/48\t85.4%\t48/94230402\t0.0%\t0/48\t0.0%\t-14409901\t-79820501\n+3251\tchr4\t+\t190214555\t82354\t82353\t82406\t53\tCCATTTTTTTGGTTTGTGGTTTTTTTTTTTTTTTTTTTTTTTTTTTTGAGATG\tchr4\t+\t94230402\t81894504\t81894503\t81894556\t81894504\t81894503\t81894556\t53\tCCACTACTGTATTTTGTGGTTTTTTTTTTCTTTTTTCTTTTGTTTTTGAGCTG\t...:.x:.x.:x.................:......:....x........x..\t53M\t43/53\t81.1%\t53/94230402\t0.0%\t0/53\t0.0%\t-81812150\t-12418252\n+3941\tchr4\t+\t190214555\t86070\t86069\t86208\t139\tAACATACAGGAATTCATGCTGGAGAGAAACCCTACAAATGTGAAAAATGTGGCAAAGCCTTTAATAGGTCCACATCACTTAGTAAA-CATAAGAGAATTCATACTGGAGAGAAACCCTACACATGTGAAGAATGTGGCAA\tchr4\t+\t94230402\t81517404\t81517403\t81517542\t81517404\t81517403\t81517542\t139\tAGCATAAAAAGACTCATACGGCAGATAAAGTGTTCACCTGTGATGAATGTGGGAAGTCATTCAACATG-CAACGAAAATTAGTAAAGCACAGAATTAGACATACTGGGGAGAGACCATACAGCTGTTCAGCATGTGGTAA\t.:....x.:::.:....:.x.x...x...x:x.x..xx.....x:.......x..:x.x..:..:.x.-.x..:xx.x........-..:.::.xx.xx........:....:...x....xx...xx..x......:..\t68M1D17M1I53M\t92/138\t66.7%\t139/94230402\t0.0%\t2/138\t1.4%\t-81431334\t-12799068\n+4798\tchr4\t+\t190214555\t86081\t86080\t86401\t321\tATTCATGCTGGAGAGAAACCCTACAAATGTGAAAAATGTGGCAAAGC----------------CTTTAATAGGTCCACATCACTTAGTAAACATAAGAGAATTCATACTGGAGAGAAACCCTACACATGTGAAGAATGTGGCAAAGCCTTTAGACGGTCCACAGT--TCTGAACGAACATAAGAAAATTCATACTGGAGAGAAACCCTACAAATGTGAAGAATGTGGCAAAGCCTTTACAAGGTCCACAACACTGAATGAA-CACAAGAAAATTCATACTGGAGAGAAACCCTACAAATGTAAAGAATGTGGCAAAGCCTTTAGATGGTCCACAAGCCTG\tchr4\t+\t94230402\t81517246\t81517245\t81517563\t81517246\t81517245\t81517563\t318\tATTCATTCTGGAGAAAAGCCTCATCTGTGTGATATCTGTGGCAGAGGTAGGTAAGAACCAAAGCTTTTGCCTCTTAAGGTCTTTGGATTGAAATGTGAGAAGTCATTTTAAACTG---TCTCATATTTTTCTAGGATTCAGTAA--------------CTTCAGTAATTTGAAGGAGCATAAAAAGACTCATACGGCAGATAAAGTGTTCACCTGTGATGAATGTGGGAAGTCATTCAACATG-CAACGAAAATTAGTAAAGCACAGAATTAGACATACTGGGGAGAGACCATACAGCTGTTCAGCATGTGGTAAGATTT----ACTGTGCATAGGCTTG\t......x.......:..:..::.:xx:.....x.xx.......:..x----------------....x::xxx.:x.x:..x:.x::.x:.x..:x.....x....x:.::.xx.---:.::.:.:x.x.xx..:..x::.:..--------------.:x....--.:....x..:.....:..:.:......x.x...x...x:x.x..xx.....x........x..:x.x..:.xx.x.-.x..:.x.x.x.:.:..-....::.xx.xx........:....:...x....:x...xx..x......:..::::.----.:x..x..:.:..:..\t47M16I52M3D26M14D7M2I76M1D17M1I58M4D16M\t186/299\t62.2%\t318/94230402\t0.0%\t41/299\t13.7%\t-81431165\t-12799237\n+5064\tchr4\t+\t190214555\t86500\t86499\t86801\t302\tTATTCATACTGGCGAAAAACCCTACACATGTGAAAAATGTGGCA-----------------AAGCTTTTAACCAATCCTCAAGTCTTATTAT--ACACAGGAGCATTCATTCTGAACAAAAACTTTACAAATGTGAAGAATGTGGCAAAGCCTTTACTTGGTCCTCATCCCTTAATAAACATAAGAGAATTCATACTGGAGAGAAACCCTACACATGTGAAGAATGTGGCAAAGCTTTTTATAGGTCCTCACACCTTGCTAAACATAAGAGAATTCATACTGGAGAGAAACCCTACACGTGCGAAGAATGTGGCAAAGCTT\tchr4\t+\t94230402\t81517245\t81517244\t81517547\t81517245\t81517244\t81517547\t303\tTATTCATTCTGGAGAAAAGCCTCATCTGTGTGATATCTGTGGCAGAGGTAGGTAAGAACCAAAGCTTTTGCC---TCTTAAGGTCTTTGGATTGAAATGTGAGAAGTCATTTTAAACTG---TCTCATATTTTTCTAGGATTCAGTAACTTCAG'..b'xx.:....x:..:..x.x...:.....\t65M9D54M\t78/119\t65.5%\t119/94230402\t0.0%\t9/119\t7.6%\t131862986\t58351569\n+4090\tchr4\t+\t190214555\t189898043\t189898042\t189898170\t128\tAAAGAAAGAGAAAGAAAGAAAAGAAAGAAAGGAAAGAAAGAAAGAAAAGAAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAGGGAGAAAG\tchr4\t-\t94230402\t30533164\t30533163\t30533286\t63697117\t63697116\t63697239\t123\tAAATAAAAAGGAAGAAAGAAAAGAGGGGAGAGAAAGACTGTGAAAAAGTAAAGGCTCAAGACAAAGACCGAAATA-----GAAGAATGAGGAAAAAAAGAGAAAACACCGGAAAGGAGAGGAATAAAG\t...x...:..:.............::.:.::......xx.x:.:...:x...:.xxx:.:.x.....xx....x.-----:.....x:..:..:...::.::...x.xx.::..:..:...:.x....\t75M5D48M\t80/123\t65.0%\t123/94230402\t0.0%\t5/123\t4.1%\t159364879\t30849676\n+3046\tchr4\t+\t190214555\t189898066\t189898065\t189898166\t101\tAAAGAAAGGAAAGAAAGAAAGAAAAGAAAAGAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAAAGAAAGAAAGAAAGAAAGAAAGAGAAAGAAAGGGAG\tchr4\t-\t94230402\t28816821\t28816820\t28816921\t65413482\t65413481\t65413582\t101\tAATGGTAGGAAAAAAAGAATGAAAAGCTGACAGAGATAAAAAGAAAGAAAAGAACACATGGAGCTGTTATGAAGCCATGAAAGAAAGAAATCAAAGCAGAG\t..x.:x......:......x......xx:.x.:...x.:...........::..x.x.x:..:xx:xx.x...:xx.x..........:.xx:..:x:...\t101M\t65/101\t64.4%\t101/94230402\t0.0%\t0/101\t0.0%\t161081245\t29133310\n+3991\tchr4\t+\t190214555\t189946201\t189946200\t189946286\t86\tTGATCGTTT-AGTACAGGTGTATTCAATCTTTTAGCTTCCCTGGGCCACAATGAAAGGAGAGGAATTGTCTTGGGCCACACATAAAA\tchr4\t-\t94230402\t9885843\t9885842\t9885929\t84344474\t84344473\t84344560\t87\tTGATTGCTTGAATTCAGGACTATCCAACCTTCTGTCTTCCCTGAGCCACAATGAGTGAATAGGAATTACCTAGGGCTGCATCTAAGA\t....:.:..-.:.x....xx...:...:...:.:x........:..........:x.:.x.......::..x....::..:x...:.\t9M1I77M\t62/86\t72.1%\t87/94230402\t0.0%\t1/86\t1.2%\t180060358\t10154197\n+3466\tchr4\t+\t190214555\t190010480\t190010479\t190010584\t105\tCAGGCT---GGCATGCAGTGGCATGATCTCAGCTCACTGCAACCTCTGCCTCCCCGGTTCGAGTGATTCACCTGCCTCAGCCTCCCAACTAGCTGGGATTACAGGTGC\tchr4\t-\t94230402\t18197091\t18197090\t18197197\t76033206\t76033205\t76033312\t107\tCAGGCTTAGGGCACACATCTGCTTGGCATCATGTT-CTACAGCCCAAAGCTTCTGGGTTCAAGTGCTTCACAGGGCTCAGCCTTTCAAGTAGCTGGAACTACACGTGC\t......---....::..x:x..x..::x...xx.:-..:..:..:xx:x..:.:x.....:....x.....xx.x........::...x.......:.:....x....\t6M3I26M1D72M\t71/104\t68.3%\t107/94230402\t0.0%\t4/104\t3.8%\t171813389\t18401166\n+3393\tchr4\t+\t190214555\t190019694\t190019693\t190019835\t142\tCTCCCTCGCCCGCTCCCTTTCTCTACTTCCCTCTCCACCTTGCCCGCTCTCCTCCCTCTCTCTCTCTCTCTCTCTCT-CGCTGTTTCT-CTCTCTCCTTCCGTTTCTATCTTTCCATCCCTCTGTC-CTTTGCTTTTCTTCAAGC\tchr4\t-\t94230402\t43081888\t43081887\t43082030\t51148373\t51148372\t51148515\t143\tCTCCCCGGCACGCTCCCTTCCTTCA--TGCTTCAAAATGCTGTCCCTCCTTTTCCTTTTTTTTCTCTCACCCTCCGTGTTCCATTTCGATTCTGAACTTCTGTTTCTTTCTCTCTCTCTCTCTGTAGCTTCTTCTTTCTGTAAAC\t.....:x..x.........:..::.--.x.:..xxx.:x:..:..x::..::...:.:.:.:......x.:...:x.-:x.::....x-:...xxx....:......x...:..:x..:......x-...:x::.....x:..:.\t25M2D50M1I10M1I37M1I18M\t89/140\t63.6%\t143/94230402\t0.0%\t5/140\t3.6%\t146937806\t43276749\n+3046\tchr4\t+\t190214555\t190080775\t190080774\t190080846\t72\tCTCTCCGGCCCCACCAC-----CACCACCGCCACCACGCCCTCCCCCCCCACCCCCCCCCCCCACCACCACCACCAC\tchr4\t-\t94230402\t85088290\t85088289\t85088366\t9142037\t9142036\t9142113\t77\tCTCTCCCGGCTCCGCACGCGTGCGGCCCCGCTCCCCTCCGCTGCCCCCCCAACCCCCCCTACCACCACCACCACCAC\t......x.x.:.xx...-----.:x.x....:x..x:x.x..x........x.......:x................\t17M5I55M\t54/72\t75.0%\t77/94230402\t0.0%\t5/72\t6.9%\t104992485\t85222070\n+3176\tchr4\t+\t190214555\t190102566\t190102565\t190102606\t41\tTCTCACCTCCGTTGGCAAAAAACAAACAAACAAACAAAAAA\tchr4\t-\t94230402\t80284921\t80284920\t80284961\t13945442\t13945441\t13945482\t41\tTCTCTCCCCCTTTGGTAAAAAACAAACAAACAAACAAAAAA\t....x..:..x....:.........................\t41M\t37/41\t90.2%\t41/94230402\t0.0%\t0/41\t0.0%\t109817645\t80396910\n+3217\tchr4\t+\t190214555\t190173429\t190173428\t190173504\t76\tCCGGCCCCACCACCACCACCGCCACCACGCCCTCCCCCACCACCCCCCCCCCCACCACCACCACCACCACCACCAC\tchr4\t-\t94230402\t85088295\t85088294\t85088366\t9142037\t9142036\t9142108\t72\tCCGGCTCCGC----ACGCGTGCGGCCCCGCTCCCCTCCGCTGCCCCCCCAACCCCCCCTACCACCACCACCACCAC\t.....:..:.----..xxx:..x:..x...:.:..:..:.::.......xx..x..x.:.................\t10M4D62M\t52/72\t72.2%\t72/94230402\t0.0%\t4/72\t5.6%\t105085134\t85129421\n' |