Mercurial > repos > greg > call_insertions
changeset 0:5626dc2d0ef2 draft default tip
Uploaded
author | greg |
---|---|
date | Tue, 07 Feb 2023 21:43:42 +0000 |
parents | |
children | |
files | .shed.yml call_insertions.xml macros.xml test-data/dnadiff_1coords.tsv test-data/dnadiff_report.txt test-data/genome_aligned.bed test-data/reference_aligned.bed |
diffstat | 7 files changed, 387 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Feb 07 21:43:42 2023 +0000 @@ -0,0 +1,9 @@ +name: call_insertions +owner: greg +description: Extracts aligned regions of 2 genomes and produces reference alignment and genome alignment BED files +long_description: Extracts aligned regions of 2 genomes and produces reference alignment and genome alignment BED files +categories: +- Nanopore +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/pima/call_insertions +homepage_url: https://github.com/gregvonkuster/galaxy_tools +type: unrestricted
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/call_insertions.xml Tue Feb 07 21:43:42 2023 +0000 @@ -0,0 +1,39 @@ +<tool id="call_insertions" name="PIMA: call insertions" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +grep AvgIdentity '$dnadiff_report' | head -1 | awk '{print $2}' > reference_identity && +grep AlignedBases '$dnadiff_report' | head -1 | awk '{sub(/\(.*/, "", $2); print $2 * 100}' > reference_aligned_bases && +cat '$dnadiff_1coords' | awk '{OFS = "\t"; if ($2 < $1){t = $2; $2 = $1; $1 = t} print $12,$1,$2}' | sort -k 1,1 -k 2,2n > '$reference_aligned' && +cat '$dnadiff_1coords' | awk '{OFS = "\t"; if ($4 < $3){t = $4; $4 = $3; $3 = t} print $13,$3,$4}' | sort -k 1,1 -k 2,2n > '$genome_aligned' +]]></command> + <inputs> + <param argument="--dnadiff_report" type="data" format="txt" label="DNAdiff report file"/> + <param argument="--dnadiff_1coords" type="data" format="tabular" label="DNAdiff 1coords file"/> + <param argument="--reference_identity_min" type="float" value="98.0" min="0" label="Minimum reference identity"/> + <param argument="--reference_alignment_min" type="float" value="97.0" min="0" label="Minimum reference alignment"/> + </inputs> + <outputs> + <data name="reference_aligned" format="bed" label="${tool.name} on ${on_string} (reference alignment)"/> + <data name="genome_aligned" format="bed" label="${tool.name} on ${on_string} (genome alignment)"/> + </outputs> + <tests> + <test> + <param name="dnadiff_report" value="dnadiff_report.txt" ftype="txt"/> + <param name="dnadiff_1coords" value="dnadiff_1coords.tsv" ftype="tsv"/> + <output name="reference_aligned" value="reference_aligned.bed" ftype="bed"/> + <output name="genome_aligned" value="genome_aligned.bed" ftype="bed"/> + </test> + </tests> + <help> +**What it does** + +Accepts DNAdiff report and DNAdiff 1coords files and extracts the aligned regions of the two genomes, producing reference alignment +and genome alignment BED files. + </help> + <expand macro="citations"/> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Feb 07 21:43:42 2023 +0000 @@ -0,0 +1,18 @@ +<macros> + <token name="@TOOL_VERSION@">1.0.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">21.01</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="9.1">coreutils</requirement> + <requirement type="package" version="5.1.0">gawk</requirement> + <requirement type="package" version="3.4">grep</requirement> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/011650</citation> + </citations> + </xml> +</macros> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dnadiff_1coords.tsv Tue Feb 07 21:43:42 2023 +0000 @@ -0,0 +1,78 @@ +6529 47531 67983 26977 41003 41007 97.62 85603 85882 47.90 47.75 AB615353.1 contig_6 +7527 9486 2618974 2617017 1960 1958 98.57 10295 4041056 19.04 0.05 AJ868288.1 contig_5 +46914 47176 1018893 1019145 263 253 90.91 53865 4041056 0.49 0.01 CP001987.1 contig_5 +1538 1625 1865599 1865510 88 90 95.56 74613 4041056 0.12 0.00 CP003018.1 contig_5 +29589 29664 2995182 2995107 76 76 98.68 54484 4041056 0.14 0.00 CP003689.1 contig_5 +73265 74226 1398768 1399684 962 917 85.35 285163 4041056 0.34 0.02 CP007625.1 contig_5 +37608 61612 74120 50104 24005 24017 97.51 83186 85882 28.86 27.97 CP010015.1 contig_6 +25405 28333 6391589 6394521 2929 2933 95.26 223537 7205629 1.31 0.04 CP010270.1 contig_1 +58641 58986 1477652 1477307 346 346 87.61 232994 7205629 0.15 0.00 CP010578.1 contig_1 +42307 42624 1952376 1952059 318 318 85.53 91229 4041056 0.35 0.01 CP011023.1 contig_5 +44648 47740 5237632 5234540 3093 3093 80.97 163286 7205629 1.89 0.04 CP012367.1 contig_1 +79448 79745 3741787 3741490 298 298 83.95 100758 7205629 0.30 0.00 CP013205.1 contig_1 +43071 45170 3699274 3701373 2100 2100 82.64 61492 7205629 3.42 0.03 CP013206.1 contig_1 +44872 46449 2272394 2273983 1578 1590 91.23 55914 7205629 2.82 0.02 CP013207.1 contig_1 +52229 54770 2788786 2791320 2542 2535 91.41 55914 7205629 4.55 0.04 CP013207.1 contig_1 +170608 171566 984247 983290 959 958 89.89 293705 4041056 0.33 0.02 CP014284.1 contig_5 +75233 75468 2522501 2522265 236 237 89.08 205480 4041056 0.11 0.01 CP014843.1 contig_5 +75233 75447 2548085 2547870 215 216 96.30 205480 4041056 0.10 0.01 CP014843.1 contig_5 +79230 79892 3542198 3542861 663 664 89.34 87645 7205629 0.76 0.01 CP017081.1 contig_1 +94468 95006 3543458 3542919 539 540 90.74 134795 7205629 0.40 0.01 CP018876.1 contig_1 +1 655 1024494 1025150 655 657 90.92 60905 4041056 1.08 0.02 CP018878.1 contig_5 +59999 60905 1903995 1904902 907 908 95.27 60905 4041056 1.49 0.02 CP018878.1 contig_5 +1634 2342 2923943 2923227 709 717 81.17 23665 4041056 3.00 0.02 CP021891.1 contig_5 +13176 13625 2910895 2910446 450 450 85.96 23665 4041056 1.90 0.01 CP021891.1 contig_5 +15668 17815 2908389 2906242 2148 2148 80.62 23665 4041056 9.08 0.05 CP021891.1 contig_5 +53109 53309 1865722 1865526 201 197 93.03 130902 4041056 0.15 0.00 CP022237.1 contig_5 +119514 119700 1781267 1781081 187 187 83.51 130902 4041056 0.14 0.00 CP022237.1 contig_5 +128735 128912 1839775 1839612 178 164 90.45 130902 4041056 0.14 0.00 CP022237.1 contig_5 +129269 129601 1924571 1924232 333 340 80.17 130902 4041056 0.25 0.01 CP022237.1 contig_5 +9202 9538 1741627 1741282 337 346 91.71 78572 4041056 0.43 0.01 CP024037.1 contig_5 +9729 9992 1019145 1018893 264 253 90.23 78572 4041056 0.34 0.01 CP024037.1 contig_5 +179 681 3855596 3856098 503 503 97.81 5504 4041056 9.14 0.01 E01503.1 contig_5 +3384 4579 1610092 1611279 1196 1188 97.07 7410 4041056 16.14 0.03 KJ411637.1 contig_5 +6618 7255 1611711 1612350 638 640 96.41 7255 4041056 8.79 0.02 KJ411638.1 contig_5 +72804 73046 3477183 3476939 243 245 88.71 102254 4041056 0.24 0.01 KX711616.1 contig_5 +88903 89871 1305936 1304964 969 973 95.58 102254 4041056 0.95 0.02 KX711616.1 contig_5 +93465 93843 3942219 3941847 379 373 84.36 102254 4041056 0.37 0.01 KX711616.1 contig_5 +6 2075 696019 698076 2070 2058 97.78 5235 4041056 39.54 0.05 KY200664.1 contig_5 +16112 16600 86781 86293 489 489 82.65 17608 4041056 2.78 0.01 LDKD02000015.1 contig_5 +6582 6774 1023582 1023775 193 194 95.36 6775 4041056 2.85 0.00 LKWO01000049.1 contig_5 +5717 5881 1840270 1840102 165 169 89.94 5901 4041056 2.80 0.00 LKWV01000069.1 contig_5 +7162 7846 1019898 1019194 685 705 82.23 8319 4041056 8.23 0.02 LLBO01000173.1 contig_5 +2144 2240 1883236 1883140 97 97 100.00 8544 4041056 1.14 0.00 LT622642.1 contig_5 +19438 19946 3719180 3719688 509 509 98.23 29760 4041056 1.71 0.01 LT622643.1 contig_5 +21266 29760 3719688 3728190 8495 8503 98.23 29760 4041056 28.55 0.21 LT622643.1 contig_5 +17455 17657 527364 527566 203 203 90.64 39382 4041056 0.52 0.01 MF996510.1 contig_5 +46557 46664 1913452 1913559 108 108 97.22 53865 4041056 0.20 0.00 NC_004604.2 contig_5 +47101 47366 6394 6140 266 255 83.90 53865 7205629 0.49 0.00 NC_004604.2 contig_1 +47529 74217 26710 4 26689 26707 96.94 85603 85882 31.18 31.10 NC_015149.1 contig_6 +74221 85603 85882 74493 11383 11390 97.89 85603 85882 13.30 13.26 NC_015149.1 contig_6 +35623 49969 82843 68493 14347 14351 98.33 84215 85882 17.04 16.71 NC_021809.1 contig_6 +67280 68584 3953855 3955161 1305 1307 85.70 216164 4041056 0.60 0.03 NZ_AP018310.1 contig_5 +20753 20870 1020390 1020273 118 118 94.92 32702 4041056 0.36 0.00 NZ_CM009107.1 contig_5 +28596 30249 4653976 4655630 1654 1655 94.41 223537 7205629 0.74 0.02 NZ_CP010270.1 contig_1 +164112 167234 1903670 1900562 3123 3109 93.57 176219 4041056 1.77 0.08 NZ_CP011009.1 contig_5 +41890 43503 5241625 5240013 1614 1613 84.14 163286 7205629 0.99 0.02 NZ_CP012367.1 contig_1 +75896 76197 2521929 2521626 302 304 89.18 205480 4041056 0.15 0.01 NZ_CP014843.1 contig_5 +64568 65235 3959023 3958356 668 668 83.43 112429 4041056 0.59 0.02 NZ_CP014851.1 contig_5 +44909 46797 3445421 3443567 1889 1855 80.39 84656 4041056 2.23 0.05 NZ_CP015325.1 contig_5 +48334 48870 3442508 3441973 537 536 85.19 84656 4041056 0.63 0.01 NZ_CP015325.1 contig_5 +256054 256172 2995687 2995570 119 118 89.08 258230 4041056 0.05 0.00 NZ_CP015439.1 contig_5 +256868 257244 1019879 1020267 377 389 86.28 258230 4041056 0.15 0.01 NZ_CP015439.1 contig_5 +6488 8071 3866799 3865209 1584 1591 79.65 17666 4041056 8.97 0.04 NZ_CP021506.1 contig_5 +12360 12999 3859243 3858601 640 643 84.78 17666 4041056 3.62 0.02 NZ_CP021506.1 contig_5 +7341 8793 3509699 3508244 1453 1456 78.62 75043 4041056 1.94 0.04 NZ_CP021671.1 contig_5 +63574 66590 3929131 3926102 3017 3030 85.90 75043 4041056 4.02 0.07 NZ_CP021671.1 contig_5 +93636 93972 988667 988325 337 343 94.19 100616 4041056 0.33 0.01 NZ_CP021678.1 contig_5 +128178 128474 2994040 2994368 297 329 83.28 130902 4041056 0.23 0.01 NZ_CP022237.1 contig_5 +99541 100822 525668 526949 1282 1282 97.58 137143 4041056 0.93 0.03 NZ_CP022984.1 contig_5 +5972 9092 1903666 1900562 3121 3105 93.76 78572 4041056 3.97 0.08 NZ_CP024037.1 contig_5 +9993 10074 1323582 1323501 82 82 100.00 78572 4041056 0.10 0.00 NZ_CP024037.1 contig_5 +70776 70947 2995652 2995825 172 174 90.29 98205 4041056 0.18 0.00 NZ_CP026740.1 contig_5 +23966 24665 1398573 1399269 700 697 85.37 24894 4041056 2.81 0.02 NZ_LFXM01000079.1 contig_5 +8665 8876 1323253 1323043 212 211 90.61 8878 4041056 2.39 0.01 NZ_LKWF01000022.1 contig_5 +5962 7186 1839444 1838219 1225 1226 92.05 7189 4041056 17.04 0.03 NZ_LKXT01000071.1 contig_5 +11540 11625 1408679 1408593 86 87 96.55 12311 4041056 0.70 0.00 NZ_LKYB01000177.1 contig_5 +5481 6177 2994398 2995096 697 699 84.53 6179 4041056 11.28 0.02 NZ_LKZD01000079.1 contig_5 +42 389 1839701 1840063 348 363 86.78 7307 4041056 4.76 0.01 NZ_LKZN01000051.1 contig_5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dnadiff_report.txt Tue Feb 07 21:43:42 2023 +0000 @@ -0,0 +1,87 @@ +/home/galaxies/lrn/galaxy/database/files/000/dataset_17.dat /home/galaxies/lrn/galaxy/database/files/002/dataset_2176.dat +NUCMER + + [REF] [QRY] +[Sequences] +TotalSeqs 34305 6 +AlignedSeqs 697(2.0318%) 3(50.0000%) +UnalignedSeqs 33608(97.9682%) 3(50.0000%) + +[Bases] +TotalBases 2077959494 11421968 +AlignedBases 3025775(0.1456%) 228614(2.0015%) +UnalignedBases 2074933719(99.8544%) 11193354(97.9985%) + +[Alignments] +1-to-1 78 78 +TotalLength 187128 187163 +AvgLength 2399.0769 2399.5256 +AvgIdentity 94.7915 94.7915 + +M-to-M 3246 3246 +TotalLength 4521754 4532531 +AvgLength 1393.0234 1396.3435 +AvgIdentity 90.0308 90.0308 + +[Feature Estimates] +Breakpoints 6288 6480 +Relocations 10 2 +Translocations 1 67 +Inversions 2 0 + +Insertions 5794 200 +InsertionSum 41691366 11221715 +InsertionAvg 7195.6103 56108.5750 + +TandemIns 0 0 +TandemInsSum 0 0 +TandemInsAvg 0.0000 0.0000 + +[SNPs] +TotalSNPs 6547 6547 +TG 351(5.3612%) 323(4.9336%) +TC 967(14.7701%) 933(14.2508%) +TA 453(6.9192%) 435(6.6443%) +GC 232(3.5436%) 202(3.0854%) +GA 989(15.1062%) 986(15.0603%) +GT 323(4.9336%) 351(5.3612%) +AG 986(15.0603%) 989(15.1062%) +AC 352(5.3765%) 324(4.9488%) +AT 435(6.6443%) 453(6.9192%) +CG 202(3.0854%) 232(3.5436%) +CA 324(4.9488%) 352(5.3765%) +CT 933(14.2508%) 967(14.7701%) + +TotalGSNPs 366 366 +TC 75(20.4918%) 59(16.1202%) +TA 15(4.0984%) 15(4.0984%) +TG 12(3.2787%) 13(3.5519%) +AC 13(3.5519%) 12(3.2787%) +AG 71(19.3989%) 73(19.9454%) +AT 15(4.0984%) 15(4.0984%) +CT 59(16.1202%) 75(20.4918%) +CG 4(1.0929%) 4(1.0929%) +CA 12(3.2787%) 13(3.5519%) +GT 13(3.5519%) 12(3.2787%) +GC 4(1.0929%) 4(1.0929%) +GA 73(19.9454%) 71(19.3989%) + +TotalIndels 874 874 +T. 133(15.2174%) 156(17.8490%) +G. 60(6.8650%) 89(10.1831%) +A. 119(13.6156%) 138(15.7895%) +C. 80(9.1533%) 99(11.3272%) +.A 138(15.7895%) 119(13.6156%) +.C 99(11.3272%) 80(9.1533%) +.G 89(10.1831%) 60(6.8650%) +.T 156(17.8490%) 133(15.2174%) + +TotalGIndels 34 34 +T. 1(2.9412%) 8(23.5294%) +A. 0(0.0000%) 15(44.1176%) +C. 0(0.0000%) 7(20.5882%) +G. 0(0.0000%) 3(8.8235%) +.G 3(8.8235%) 0(0.0000%) +.A 15(44.1176%) 0(0.0000%) +.C 7(20.5882%) 0(0.0000%) +.T 8(23.5294%) 1(2.9412%)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome_aligned.bed Tue Feb 07 21:43:42 2023 +0000 @@ -0,0 +1,78 @@ +contig_1 6140 6394 +contig_1 1477307 1477652 +contig_1 2272394 2273983 +contig_1 2788786 2791320 +contig_1 3542198 3542861 +contig_1 3542919 3543458 +contig_1 3699274 3701373 +contig_1 3741490 3741787 +contig_1 4653976 4655630 +contig_1 5234540 5237632 +contig_1 5240013 5241625 +contig_1 6391589 6394521 +contig_5 86293 86781 +contig_5 525668 526949 +contig_5 527364 527566 +contig_5 696019 698076 +contig_5 983290 984247 +contig_5 988325 988667 +contig_5 1018893 1019145 +contig_5 1018893 1019145 +contig_5 1019194 1019898 +contig_5 1019879 1020267 +contig_5 1020273 1020390 +contig_5 1023582 1023775 +contig_5 1024494 1025150 +contig_5 1304964 1305936 +contig_5 1323043 1323253 +contig_5 1323501 1323582 +contig_5 1398573 1399269 +contig_5 1398768 1399684 +contig_5 1408593 1408679 +contig_5 1610092 1611279 +contig_5 1611711 1612350 +contig_5 1741282 1741627 +contig_5 1781081 1781267 +contig_5 1838219 1839444 +contig_5 1839612 1839775 +contig_5 1839701 1840063 +contig_5 1840102 1840270 +contig_5 1865510 1865599 +contig_5 1865526 1865722 +contig_5 1883140 1883236 +contig_5 1900562 1903666 +contig_5 1900562 1903670 +contig_5 1903995 1904902 +contig_5 1913452 1913559 +contig_5 1924232 1924571 +contig_5 1952059 1952376 +contig_5 2521626 2521929 +contig_5 2522265 2522501 +contig_5 2547870 2548085 +contig_5 2617017 2618974 +contig_5 2906242 2908389 +contig_5 2910446 2910895 +contig_5 2923227 2923943 +contig_5 2994040 2994368 +contig_5 2994398 2995096 +contig_5 2995107 2995182 +contig_5 2995570 2995687 +contig_5 2995652 2995825 +contig_5 3441973 3442508 +contig_5 3443567 3445421 +contig_5 3476939 3477183 +contig_5 3508244 3509699 +contig_5 3719180 3719688 +contig_5 3719688 3728190 +contig_5 3855596 3856098 +contig_5 3858601 3859243 +contig_5 3865209 3866799 +contig_5 3926102 3929131 +contig_5 3941847 3942219 +contig_5 3953855 3955161 +contig_5 3958356 3959023 +contig_6 4 26710 +contig_6 26977 67983 +contig_6 50104 74120 +contig_6 68493 82843 +contig_6 74493 85882
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference_aligned.bed Tue Feb 07 21:43:42 2023 +0000 @@ -0,0 +1,78 @@ +AB615353.1 6529 47531 +AJ868288.1 7527 9486 +CP001987.1 46914 47176 +CP003018.1 1538 1625 +CP003689.1 29589 29664 +CP007625.1 73265 74226 +CP010015.1 37608 61612 +CP010270.1 25405 28333 +CP010578.1 58641 58986 +CP011023.1 42307 42624 +CP012367.1 44648 47740 +CP013205.1 79448 79745 +CP013206.1 43071 45170 +CP013207.1 44872 46449 +CP013207.1 52229 54770 +CP014284.1 170608 171566 +CP014843.1 75233 75447 +CP014843.1 75233 75468 +CP017081.1 79230 79892 +CP018876.1 94468 95006 +CP018878.1 1 655 +CP018878.1 59999 60905 +CP021891.1 1634 2342 +CP021891.1 13176 13625 +CP021891.1 15668 17815 +CP022237.1 53109 53309 +CP022237.1 119514 119700 +CP022237.1 128735 128912 +CP022237.1 129269 129601 +CP024037.1 9202 9538 +CP024037.1 9729 9992 +E01503.1 179 681 +KJ411637.1 3384 4579 +KJ411638.1 6618 7255 +KX711616.1 72804 73046 +KX711616.1 88903 89871 +KX711616.1 93465 93843 +KY200664.1 6 2075 +LDKD02000015.1 16112 16600 +LKWO01000049.1 6582 6774 +LKWV01000069.1 5717 5881 +LLBO01000173.1 7162 7846 +LT622642.1 2144 2240 +LT622643.1 19438 19946 +LT622643.1 21266 29760 +MF996510.1 17455 17657 +NC_004604.2 46557 46664 +NC_004604.2 47101 47366 +NC_015149.1 47529 74217 +NC_015149.1 74221 85603 +NC_021809.1 35623 49969 +NZ_AP018310.1 67280 68584 +NZ_CM009107.1 20753 20870 +NZ_CP010270.1 28596 30249 +NZ_CP011009.1 164112 167234 +NZ_CP012367.1 41890 43503 +NZ_CP014843.1 75896 76197 +NZ_CP014851.1 64568 65235 +NZ_CP015325.1 44909 46797 +NZ_CP015325.1 48334 48870 +NZ_CP015439.1 256054 256172 +NZ_CP015439.1 256868 257244 +NZ_CP021506.1 6488 8071 +NZ_CP021506.1 12360 12999 +NZ_CP021671.1 7341 8793 +NZ_CP021671.1 63574 66590 +NZ_CP021678.1 93636 93972 +NZ_CP022237.1 128178 128474 +NZ_CP022984.1 99541 100822 +NZ_CP024037.1 5972 9092 +NZ_CP024037.1 9993 10074 +NZ_CP026740.1 70776 70947 +NZ_LFXM01000079.1 23966 24665 +NZ_LKWF01000022.1 8665 8876 +NZ_LKXT01000071.1 5962 7186 +NZ_LKYB01000177.1 11540 11625 +NZ_LKZD01000079.1 5481 6177 +NZ_LKZN01000051.1 42 389