changeset 0:5626dc2d0ef2 draft default tip

Uploaded
author greg
date Tue, 07 Feb 2023 21:43:42 +0000
parents
children
files .shed.yml call_insertions.xml macros.xml test-data/dnadiff_1coords.tsv test-data/dnadiff_report.txt test-data/genome_aligned.bed test-data/reference_aligned.bed
diffstat 7 files changed, 387 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Tue Feb 07 21:43:42 2023 +0000
@@ -0,0 +1,9 @@
+name: call_insertions
+owner: greg
+description: Extracts aligned regions of 2 genomes and produces reference alignment and genome alignment BED files
+long_description: Extracts aligned regions of 2 genomes and produces reference alignment and genome alignment BED files
+categories: 
+- Nanopore
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/pima/call_insertions
+homepage_url: https://github.com/gregvonkuster/galaxy_tools
+type: unrestricted
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/call_insertions.xml	Tue Feb 07 21:43:42 2023 +0000
@@ -0,0 +1,39 @@
+<tool id="call_insertions" name="PIMA: call insertions" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description></description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+grep AvgIdentity '$dnadiff_report' | head -1 | awk '{print $2}' > reference_identity &&
+grep AlignedBases '$dnadiff_report' | head -1 | awk '{sub(/\(.*/, "", $2); print $2 * 100}' > reference_aligned_bases &&
+cat '$dnadiff_1coords' | awk '{OFS = "\t"; if ($2 < $1){t = $2; $2 = $1; $1 = t} print $12,$1,$2}' | sort -k 1,1 -k 2,2n > '$reference_aligned' &&
+cat '$dnadiff_1coords' | awk '{OFS = "\t"; if ($4 < $3){t = $4; $4 = $3; $3 = t} print $13,$3,$4}' | sort -k 1,1 -k 2,2n > '$genome_aligned'
+]]></command>
+    <inputs>
+        <param argument="--dnadiff_report" type="data" format="txt" label="DNAdiff report file"/>
+        <param argument="--dnadiff_1coords" type="data" format="tabular" label="DNAdiff 1coords file"/>
+        <param argument="--reference_identity_min" type="float" value="98.0" min="0" label="Minimum reference identity"/>
+        <param argument="--reference_alignment_min" type="float" value="97.0" min="0" label="Minimum reference alignment"/>
+    </inputs>
+    <outputs>
+        <data name="reference_aligned" format="bed" label="${tool.name} on ${on_string} (reference alignment)"/>
+        <data name="genome_aligned" format="bed" label="${tool.name} on ${on_string} (genome alignment)"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="dnadiff_report" value="dnadiff_report.txt" ftype="txt"/>
+            <param name="dnadiff_1coords" value="dnadiff_1coords.tsv" ftype="tsv"/>
+            <output name="reference_aligned" value="reference_aligned.bed" ftype="bed"/>
+            <output name="genome_aligned" value="genome_aligned.bed" ftype="bed"/>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Accepts DNAdiff report and DNAdiff 1coords files and extracts the aligned regions of the two genomes, producing reference alignment
+and genome alignment BED files.
+    </help>
+    <expand macro="citations"/>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Feb 07 21:43:42 2023 +0000
@@ -0,0 +1,18 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.0.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.01</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="9.1">coreutils</requirement>
+            <requirement type="package" version="5.1.0">gawk</requirement>
+            <requirement type="package" version="3.4">grep</requirement>
+        </requirements>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/011650</citation>
+        </citations>
+    </xml>
+</macros>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dnadiff_1coords.tsv	Tue Feb 07 21:43:42 2023 +0000
@@ -0,0 +1,78 @@
+6529	47531	67983	26977	41003	41007	97.62	85603	85882	47.90	47.75	AB615353.1	contig_6
+7527	9486	2618974	2617017	1960	1958	98.57	10295	4041056	19.04	0.05	AJ868288.1	contig_5
+46914	47176	1018893	1019145	263	253	90.91	53865	4041056	0.49	0.01	CP001987.1	contig_5
+1538	1625	1865599	1865510	88	90	95.56	74613	4041056	0.12	0.00	CP003018.1	contig_5
+29589	29664	2995182	2995107	76	76	98.68	54484	4041056	0.14	0.00	CP003689.1	contig_5
+73265	74226	1398768	1399684	962	917	85.35	285163	4041056	0.34	0.02	CP007625.1	contig_5
+37608	61612	74120	50104	24005	24017	97.51	83186	85882	28.86	27.97	CP010015.1	contig_6
+25405	28333	6391589	6394521	2929	2933	95.26	223537	7205629	1.31	0.04	CP010270.1	contig_1
+58641	58986	1477652	1477307	346	346	87.61	232994	7205629	0.15	0.00	CP010578.1	contig_1
+42307	42624	1952376	1952059	318	318	85.53	91229	4041056	0.35	0.01	CP011023.1	contig_5
+44648	47740	5237632	5234540	3093	3093	80.97	163286	7205629	1.89	0.04	CP012367.1	contig_1
+79448	79745	3741787	3741490	298	298	83.95	100758	7205629	0.30	0.00	CP013205.1	contig_1
+43071	45170	3699274	3701373	2100	2100	82.64	61492	7205629	3.42	0.03	CP013206.1	contig_1
+44872	46449	2272394	2273983	1578	1590	91.23	55914	7205629	2.82	0.02	CP013207.1	contig_1
+52229	54770	2788786	2791320	2542	2535	91.41	55914	7205629	4.55	0.04	CP013207.1	contig_1
+170608	171566	984247	983290	959	958	89.89	293705	4041056	0.33	0.02	CP014284.1	contig_5
+75233	75468	2522501	2522265	236	237	89.08	205480	4041056	0.11	0.01	CP014843.1	contig_5
+75233	75447	2548085	2547870	215	216	96.30	205480	4041056	0.10	0.01	CP014843.1	contig_5
+79230	79892	3542198	3542861	663	664	89.34	87645	7205629	0.76	0.01	CP017081.1	contig_1
+94468	95006	3543458	3542919	539	540	90.74	134795	7205629	0.40	0.01	CP018876.1	contig_1
+1	655	1024494	1025150	655	657	90.92	60905	4041056	1.08	0.02	CP018878.1	contig_5
+59999	60905	1903995	1904902	907	908	95.27	60905	4041056	1.49	0.02	CP018878.1	contig_5
+1634	2342	2923943	2923227	709	717	81.17	23665	4041056	3.00	0.02	CP021891.1	contig_5
+13176	13625	2910895	2910446	450	450	85.96	23665	4041056	1.90	0.01	CP021891.1	contig_5
+15668	17815	2908389	2906242	2148	2148	80.62	23665	4041056	9.08	0.05	CP021891.1	contig_5
+53109	53309	1865722	1865526	201	197	93.03	130902	4041056	0.15	0.00	CP022237.1	contig_5
+119514	119700	1781267	1781081	187	187	83.51	130902	4041056	0.14	0.00	CP022237.1	contig_5
+128735	128912	1839775	1839612	178	164	90.45	130902	4041056	0.14	0.00	CP022237.1	contig_5
+129269	129601	1924571	1924232	333	340	80.17	130902	4041056	0.25	0.01	CP022237.1	contig_5
+9202	9538	1741627	1741282	337	346	91.71	78572	4041056	0.43	0.01	CP024037.1	contig_5
+9729	9992	1019145	1018893	264	253	90.23	78572	4041056	0.34	0.01	CP024037.1	contig_5
+179	681	3855596	3856098	503	503	97.81	5504	4041056	9.14	0.01	E01503.1	contig_5
+3384	4579	1610092	1611279	1196	1188	97.07	7410	4041056	16.14	0.03	KJ411637.1	contig_5
+6618	7255	1611711	1612350	638	640	96.41	7255	4041056	8.79	0.02	KJ411638.1	contig_5
+72804	73046	3477183	3476939	243	245	88.71	102254	4041056	0.24	0.01	KX711616.1	contig_5
+88903	89871	1305936	1304964	969	973	95.58	102254	4041056	0.95	0.02	KX711616.1	contig_5
+93465	93843	3942219	3941847	379	373	84.36	102254	4041056	0.37	0.01	KX711616.1	contig_5
+6	2075	696019	698076	2070	2058	97.78	5235	4041056	39.54	0.05	KY200664.1	contig_5
+16112	16600	86781	86293	489	489	82.65	17608	4041056	2.78	0.01	LDKD02000015.1	contig_5
+6582	6774	1023582	1023775	193	194	95.36	6775	4041056	2.85	0.00	LKWO01000049.1	contig_5
+5717	5881	1840270	1840102	165	169	89.94	5901	4041056	2.80	0.00	LKWV01000069.1	contig_5
+7162	7846	1019898	1019194	685	705	82.23	8319	4041056	8.23	0.02	LLBO01000173.1	contig_5
+2144	2240	1883236	1883140	97	97	100.00	8544	4041056	1.14	0.00	LT622642.1	contig_5
+19438	19946	3719180	3719688	509	509	98.23	29760	4041056	1.71	0.01	LT622643.1	contig_5
+21266	29760	3719688	3728190	8495	8503	98.23	29760	4041056	28.55	0.21	LT622643.1	contig_5
+17455	17657	527364	527566	203	203	90.64	39382	4041056	0.52	0.01	MF996510.1	contig_5
+46557	46664	1913452	1913559	108	108	97.22	53865	4041056	0.20	0.00	NC_004604.2	contig_5
+47101	47366	6394	6140	266	255	83.90	53865	7205629	0.49	0.00	NC_004604.2	contig_1
+47529	74217	26710	4	26689	26707	96.94	85603	85882	31.18	31.10	NC_015149.1	contig_6
+74221	85603	85882	74493	11383	11390	97.89	85603	85882	13.30	13.26	NC_015149.1	contig_6
+35623	49969	82843	68493	14347	14351	98.33	84215	85882	17.04	16.71	NC_021809.1	contig_6
+67280	68584	3953855	3955161	1305	1307	85.70	216164	4041056	0.60	0.03	NZ_AP018310.1	contig_5
+20753	20870	1020390	1020273	118	118	94.92	32702	4041056	0.36	0.00	NZ_CM009107.1	contig_5
+28596	30249	4653976	4655630	1654	1655	94.41	223537	7205629	0.74	0.02	NZ_CP010270.1	contig_1
+164112	167234	1903670	1900562	3123	3109	93.57	176219	4041056	1.77	0.08	NZ_CP011009.1	contig_5
+41890	43503	5241625	5240013	1614	1613	84.14	163286	7205629	0.99	0.02	NZ_CP012367.1	contig_1
+75896	76197	2521929	2521626	302	304	89.18	205480	4041056	0.15	0.01	NZ_CP014843.1	contig_5
+64568	65235	3959023	3958356	668	668	83.43	112429	4041056	0.59	0.02	NZ_CP014851.1	contig_5
+44909	46797	3445421	3443567	1889	1855	80.39	84656	4041056	2.23	0.05	NZ_CP015325.1	contig_5
+48334	48870	3442508	3441973	537	536	85.19	84656	4041056	0.63	0.01	NZ_CP015325.1	contig_5
+256054	256172	2995687	2995570	119	118	89.08	258230	4041056	0.05	0.00	NZ_CP015439.1	contig_5
+256868	257244	1019879	1020267	377	389	86.28	258230	4041056	0.15	0.01	NZ_CP015439.1	contig_5
+6488	8071	3866799	3865209	1584	1591	79.65	17666	4041056	8.97	0.04	NZ_CP021506.1	contig_5
+12360	12999	3859243	3858601	640	643	84.78	17666	4041056	3.62	0.02	NZ_CP021506.1	contig_5
+7341	8793	3509699	3508244	1453	1456	78.62	75043	4041056	1.94	0.04	NZ_CP021671.1	contig_5
+63574	66590	3929131	3926102	3017	3030	85.90	75043	4041056	4.02	0.07	NZ_CP021671.1	contig_5
+93636	93972	988667	988325	337	343	94.19	100616	4041056	0.33	0.01	NZ_CP021678.1	contig_5
+128178	128474	2994040	2994368	297	329	83.28	130902	4041056	0.23	0.01	NZ_CP022237.1	contig_5
+99541	100822	525668	526949	1282	1282	97.58	137143	4041056	0.93	0.03	NZ_CP022984.1	contig_5
+5972	9092	1903666	1900562	3121	3105	93.76	78572	4041056	3.97	0.08	NZ_CP024037.1	contig_5
+9993	10074	1323582	1323501	82	82	100.00	78572	4041056	0.10	0.00	NZ_CP024037.1	contig_5
+70776	70947	2995652	2995825	172	174	90.29	98205	4041056	0.18	0.00	NZ_CP026740.1	contig_5
+23966	24665	1398573	1399269	700	697	85.37	24894	4041056	2.81	0.02	NZ_LFXM01000079.1	contig_5
+8665	8876	1323253	1323043	212	211	90.61	8878	4041056	2.39	0.01	NZ_LKWF01000022.1	contig_5
+5962	7186	1839444	1838219	1225	1226	92.05	7189	4041056	17.04	0.03	NZ_LKXT01000071.1	contig_5
+11540	11625	1408679	1408593	86	87	96.55	12311	4041056	0.70	0.00	NZ_LKYB01000177.1	contig_5
+5481	6177	2994398	2995096	697	699	84.53	6179	4041056	11.28	0.02	NZ_LKZD01000079.1	contig_5
+42	389	1839701	1840063	348	363	86.78	7307	4041056	4.76	0.01	NZ_LKZN01000051.1	contig_5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dnadiff_report.txt	Tue Feb 07 21:43:42 2023 +0000
@@ -0,0 +1,87 @@
+/home/galaxies/lrn/galaxy/database/files/000/dataset_17.dat /home/galaxies/lrn/galaxy/database/files/002/dataset_2176.dat
+NUCMER
+
+                               [REF]                [QRY]
+[Sequences]
+TotalSeqs                      34305                    6
+AlignedSeqs             697(2.0318%)          3(50.0000%)
+UnalignedSeqs        33608(97.9682%)          3(50.0000%)
+
+[Bases]
+TotalBases                2077959494             11421968
+AlignedBases        3025775(0.1456%)      228614(2.0015%)
+UnalignedBases  2074933719(99.8544%)   11193354(97.9985%)
+
+[Alignments]
+1-to-1                            78                   78
+TotalLength                   187128               187163
+AvgLength                  2399.0769            2399.5256
+AvgIdentity                  94.7915              94.7915
+
+M-to-M                          3246                 3246
+TotalLength                  4521754              4532531
+AvgLength                  1393.0234            1396.3435
+AvgIdentity                  90.0308              90.0308
+
+[Feature Estimates]
+Breakpoints                     6288                 6480
+Relocations                       10                    2
+Translocations                     1                   67
+Inversions                         2                    0
+
+Insertions                      5794                  200
+InsertionSum                41691366             11221715
+InsertionAvg               7195.6103           56108.5750
+
+TandemIns                          0                    0
+TandemInsSum                       0                    0
+TandemInsAvg                  0.0000               0.0000
+
+[SNPs]
+TotalSNPs                       6547                 6547
+TG                      351(5.3612%)         323(4.9336%)
+TC                     967(14.7701%)        933(14.2508%)
+TA                      453(6.9192%)         435(6.6443%)
+GC                      232(3.5436%)         202(3.0854%)
+GA                     989(15.1062%)        986(15.0603%)
+GT                      323(4.9336%)         351(5.3612%)
+AG                     986(15.0603%)        989(15.1062%)
+AC                      352(5.3765%)         324(4.9488%)
+AT                      435(6.6443%)         453(6.9192%)
+CG                      202(3.0854%)         232(3.5436%)
+CA                      324(4.9488%)         352(5.3765%)
+CT                     933(14.2508%)        967(14.7701%)
+
+TotalGSNPs                       366                  366
+TC                      75(20.4918%)         59(16.1202%)
+TA                       15(4.0984%)          15(4.0984%)
+TG                       12(3.2787%)          13(3.5519%)
+AC                       13(3.5519%)          12(3.2787%)
+AG                      71(19.3989%)         73(19.9454%)
+AT                       15(4.0984%)          15(4.0984%)
+CT                      59(16.1202%)         75(20.4918%)
+CG                        4(1.0929%)           4(1.0929%)
+CA                       12(3.2787%)          13(3.5519%)
+GT                       13(3.5519%)          12(3.2787%)
+GC                        4(1.0929%)           4(1.0929%)
+GA                      73(19.9454%)         71(19.3989%)
+
+TotalIndels                      874                  874
+T.                     133(15.2174%)        156(17.8490%)
+G.                       60(6.8650%)         89(10.1831%)
+A.                     119(13.6156%)        138(15.7895%)
+C.                       80(9.1533%)         99(11.3272%)
+.A                     138(15.7895%)        119(13.6156%)
+.C                      99(11.3272%)          80(9.1533%)
+.G                      89(10.1831%)          60(6.8650%)
+.T                     156(17.8490%)        133(15.2174%)
+
+TotalGIndels                      34                   34
+T.                        1(2.9412%)          8(23.5294%)
+A.                        0(0.0000%)         15(44.1176%)
+C.                        0(0.0000%)          7(20.5882%)
+G.                        0(0.0000%)           3(8.8235%)
+.G                        3(8.8235%)           0(0.0000%)
+.A                      15(44.1176%)           0(0.0000%)
+.C                       7(20.5882%)           0(0.0000%)
+.T                       8(23.5294%)           1(2.9412%)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genome_aligned.bed	Tue Feb 07 21:43:42 2023 +0000
@@ -0,0 +1,78 @@
+contig_1	6140	6394
+contig_1	1477307	1477652
+contig_1	2272394	2273983
+contig_1	2788786	2791320
+contig_1	3542198	3542861
+contig_1	3542919	3543458
+contig_1	3699274	3701373
+contig_1	3741490	3741787
+contig_1	4653976	4655630
+contig_1	5234540	5237632
+contig_1	5240013	5241625
+contig_1	6391589	6394521
+contig_5	86293	86781
+contig_5	525668	526949
+contig_5	527364	527566
+contig_5	696019	698076
+contig_5	983290	984247
+contig_5	988325	988667
+contig_5	1018893	1019145
+contig_5	1018893	1019145
+contig_5	1019194	1019898
+contig_5	1019879	1020267
+contig_5	1020273	1020390
+contig_5	1023582	1023775
+contig_5	1024494	1025150
+contig_5	1304964	1305936
+contig_5	1323043	1323253
+contig_5	1323501	1323582
+contig_5	1398573	1399269
+contig_5	1398768	1399684
+contig_5	1408593	1408679
+contig_5	1610092	1611279
+contig_5	1611711	1612350
+contig_5	1741282	1741627
+contig_5	1781081	1781267
+contig_5	1838219	1839444
+contig_5	1839612	1839775
+contig_5	1839701	1840063
+contig_5	1840102	1840270
+contig_5	1865510	1865599
+contig_5	1865526	1865722
+contig_5	1883140	1883236
+contig_5	1900562	1903666
+contig_5	1900562	1903670
+contig_5	1903995	1904902
+contig_5	1913452	1913559
+contig_5	1924232	1924571
+contig_5	1952059	1952376
+contig_5	2521626	2521929
+contig_5	2522265	2522501
+contig_5	2547870	2548085
+contig_5	2617017	2618974
+contig_5	2906242	2908389
+contig_5	2910446	2910895
+contig_5	2923227	2923943
+contig_5	2994040	2994368
+contig_5	2994398	2995096
+contig_5	2995107	2995182
+contig_5	2995570	2995687
+contig_5	2995652	2995825
+contig_5	3441973	3442508
+contig_5	3443567	3445421
+contig_5	3476939	3477183
+contig_5	3508244	3509699
+contig_5	3719180	3719688
+contig_5	3719688	3728190
+contig_5	3855596	3856098
+contig_5	3858601	3859243
+contig_5	3865209	3866799
+contig_5	3926102	3929131
+contig_5	3941847	3942219
+contig_5	3953855	3955161
+contig_5	3958356	3959023
+contig_6	4	26710
+contig_6	26977	67983
+contig_6	50104	74120
+contig_6	68493	82843
+contig_6	74493	85882
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference_aligned.bed	Tue Feb 07 21:43:42 2023 +0000
@@ -0,0 +1,78 @@
+AB615353.1	6529	47531
+AJ868288.1	7527	9486
+CP001987.1	46914	47176
+CP003018.1	1538	1625
+CP003689.1	29589	29664
+CP007625.1	73265	74226
+CP010015.1	37608	61612
+CP010270.1	25405	28333
+CP010578.1	58641	58986
+CP011023.1	42307	42624
+CP012367.1	44648	47740
+CP013205.1	79448	79745
+CP013206.1	43071	45170
+CP013207.1	44872	46449
+CP013207.1	52229	54770
+CP014284.1	170608	171566
+CP014843.1	75233	75447
+CP014843.1	75233	75468
+CP017081.1	79230	79892
+CP018876.1	94468	95006
+CP018878.1	1	655
+CP018878.1	59999	60905
+CP021891.1	1634	2342
+CP021891.1	13176	13625
+CP021891.1	15668	17815
+CP022237.1	53109	53309
+CP022237.1	119514	119700
+CP022237.1	128735	128912
+CP022237.1	129269	129601
+CP024037.1	9202	9538
+CP024037.1	9729	9992
+E01503.1	179	681
+KJ411637.1	3384	4579
+KJ411638.1	6618	7255
+KX711616.1	72804	73046
+KX711616.1	88903	89871
+KX711616.1	93465	93843
+KY200664.1	6	2075
+LDKD02000015.1	16112	16600
+LKWO01000049.1	6582	6774
+LKWV01000069.1	5717	5881
+LLBO01000173.1	7162	7846
+LT622642.1	2144	2240
+LT622643.1	19438	19946
+LT622643.1	21266	29760
+MF996510.1	17455	17657
+NC_004604.2	46557	46664
+NC_004604.2	47101	47366
+NC_015149.1	47529	74217
+NC_015149.1	74221	85603
+NC_021809.1	35623	49969
+NZ_AP018310.1	67280	68584
+NZ_CM009107.1	20753	20870
+NZ_CP010270.1	28596	30249
+NZ_CP011009.1	164112	167234
+NZ_CP012367.1	41890	43503
+NZ_CP014843.1	75896	76197
+NZ_CP014851.1	64568	65235
+NZ_CP015325.1	44909	46797
+NZ_CP015325.1	48334	48870
+NZ_CP015439.1	256054	256172
+NZ_CP015439.1	256868	257244
+NZ_CP021506.1	6488	8071
+NZ_CP021506.1	12360	12999
+NZ_CP021671.1	7341	8793
+NZ_CP021671.1	63574	66590
+NZ_CP021678.1	93636	93972
+NZ_CP022237.1	128178	128474
+NZ_CP022984.1	99541	100822
+NZ_CP024037.1	5972	9092
+NZ_CP024037.1	9993	10074
+NZ_CP026740.1	70776	70947
+NZ_LFXM01000079.1	23966	24665
+NZ_LKWF01000022.1	8665	8876
+NZ_LKXT01000071.1	5962	7186
+NZ_LKYB01000177.1	11540	11625
+NZ_LKZD01000079.1	5481	6177
+NZ_LKZN01000051.1	42	389