changeset 4:78bbd17d0703 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 693df287d23b0fd9dfd134b41d401a438c3f5ad6"
author iuc
date Mon, 22 Jun 2020 07:31:22 -0400
parents 7f781286b55f
children 5e668dc9f379
files completemask.py sanitize_bed.py test-data/covid19/ARTIC-V1-bad.bed
diffstat 3 files changed, 244 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/completemask.py	Mon Jun 22 07:31:22 2020 -0400
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+
+import sys
+
+
+if __name__ == '__main__':
+    with open(sys.argv[1]) as i:
+        getmasked_output = i.readline().strip()
+
+    if not getmasked_output:
+        print()
+        print('No affected primer binding sites found!')
+    else:
+        masked_primers = getmasked_output.split('\t')
+        with open(sys.argv[2]) as i:
+            amplicon_data = [line.strip().split('\t') for line in i]
+
+        masked_complete = []
+        for primer in masked_primers:
+            for amplicon in amplicon_data:
+                if primer in amplicon:
+                    masked_complete += amplicon
+        result = '\t'.join(sorted(set(masked_complete)))
+        print()
+        print('Removing reads primed with any of:')
+        print(result)
+        with open(sys.argv[1], 'w') as o:
+            o.write(result + '\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sanitize_bed.py	Mon Jun 22 07:31:22 2020 -0400
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+
+import sys
+
+
+with open(sys.argv[1]) as i:
+    bed_data = i.readlines()
+
+sanitized_data = []
+try:
+    for record in bed_data:
+        fields = record.split('\t')
+        sanitized_data.append(
+            '\t'.join(fields[:4] + ['60'] + fields[5:])
+        )
+except IndexError:
+    pass  # leave column number issue to getmasked
+else:
+    with open(sys.argv[1], 'w') as o:
+        o.writelines(sanitized_data)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/covid19/ARTIC-V1-bad.bed	Mon Jun 22 07:31:22 2020 -0400
@@ -0,0 +1,196 @@
+MN908947.3	30	54	nCoV-2019_1_LEFT	NOINT	+
+MN908947.3	385	410	nCoV-2019_1_RIGHT	NOINT	-
+MN908947.3	320	342	nCoV-2019_2_LEFT	NOINT	+
+MN908947.3	704	726	nCoV-2019_2_RIGHT	NOINT	-
+MN908947.3	642	664	nCoV-2019_3_LEFT	NOINT	+
+MN908947.3	1004	1028	nCoV-2019_3_RIGHT	NOINT	-
+MN908947.3	943	965	nCoV-2019_4_LEFT	NOINT	+
+MN908947.3	1312	1337	nCoV-2019_4_RIGHT	NOINT	-
+MN908947.3	1242	1264	nCoV-2019_5_LEFT	NOINT	+
+MN908947.3	1623	1651	nCoV-2019_5_RIGHT	NOINT	-
+MN908947.3	1573	1595	nCoV-2019_6_LEFT	NOINT	+
+MN908947.3	1942	1964	nCoV-2019_6_RIGHT	NOINT	-
+MN908947.3	1875	1897	nCoV-2019_7_LEFT	NOINT	+
+MN908947.3	2247	2269	nCoV-2019_7_RIGHT	NOINT	-
+MN908947.3	2181	2205	nCoV-2019_8_LEFT	NOINT	+
+MN908947.3	2568	2592	nCoV-2019_8_RIGHT	NOINT	-
+MN908947.3	2505	2529	nCoV-2019_9_LEFT	NOINT	+
+MN908947.3	2882	2904	nCoV-2019_9_RIGHT	NOINT	-
+MN908947.3	2826	2850	nCoV-2019_10_LEFT	NOINT	+
+MN908947.3	3183	3210	nCoV-2019_10_RIGHT	NOINT	-
+MN908947.3	3144	3166	nCoV-2019_11_LEFT	NOINT	+
+MN908947.3	3507	3531	nCoV-2019_11_RIGHT	NOINT	-
+MN908947.3	3460	3482	nCoV-2019_12_LEFT	NOINT	+
+MN908947.3	3826	3853	nCoV-2019_12_RIGHT	NOINT	-
+MN908947.3	3771	3795	nCoV-2019_13_LEFT	NOINT	+
+MN908947.3	4142	4164	nCoV-2019_13_RIGHT	NOINT	-
+MN908947.3	4054	4077	nCoV-2019_14_LEFT	NOINT	+
+MN908947.3	4428	4450	nCoV-2019_14_RIGHT	NOINT	-
+MN908947.3	4294	4321	nCoV-2019_15_LEFT	NOINT	+
+MN908947.3	4674	4696	nCoV-2019_15_RIGHT	NOINT	-
+MN908947.3	4636	4658	nCoV-2019_16_LEFT	NOINT	+
+MN908947.3	4995	5017	nCoV-2019_16_RIGHT	NOINT	-
+MN908947.3	4939	4966	nCoV-2019_17_LEFT	NOINT	+
+MN908947.3	5296	5321	nCoV-2019_17_RIGHT	NOINT	-
+MN908947.3	5230	5259	nCoV-2019_18_LEFT	NOINT	+
+MN908947.3	5620	5644	nCoV-2019_18_RIGHT	NOINT	-
+MN908947.3	5563	5586	nCoV-2019_19_LEFT	NOINT	+
+MN908947.3	5932	5957	nCoV-2019_19_RIGHT	NOINT	-
+MN908947.3	5867	5894	nCoV-2019_20_LEFT	NOINT	+
+MN908947.3	6247	6272	nCoV-2019_20_RIGHT	NOINT	-
+MN908947.3	6167	6196	nCoV-2019_21_LEFT	NOINT	+
+MN908947.3	6528	6550	nCoV-2019_21_RIGHT	NOINT	-
+MN908947.3	6466	6495	nCoV-2019_22_LEFT	NOINT	+
+MN908947.3	6846	6873	nCoV-2019_22_RIGHT	NOINT	-
+MN908947.3	6718	6745	nCoV-2019_23_LEFT	NOINT	+
+MN908947.3	7092	7117	nCoV-2019_23_RIGHT	NOINT	-
+MN908947.3	7035	7058	nCoV-2019_24_LEFT	NOINT	+
+MN908947.3	7389	7415	nCoV-2019_24_RIGHT	NOINT	-
+MN908947.3	7305	7332	nCoV-2019_25_LEFT	NOINT	+
+MN908947.3	7671	7694	nCoV-2019_25_RIGHT	NOINT	-
+MN908947.3	7626	7651	nCoV-2019_26_LEFT	NOINT	+
+MN908947.3	7997	8019	nCoV-2019_26_RIGHT	NOINT	-
+MN908947.3	7943	7968	nCoV-2019_27_LEFT	NOINT	+
+MN908947.3	8319	8341	nCoV-2019_27_RIGHT	NOINT	-
+MN908947.3	8249	8275	nCoV-2019_28_LEFT	NOINT	+
+MN908947.3	8635	8661	nCoV-2019_28_RIGHT	NOINT	-
+MN908947.3	8595	8619	nCoV-2019_29_LEFT	NOINT	+
+MN908947.3	8954	8983	nCoV-2019_29_RIGHT	NOINT	-
+MN908947.3	8888	8913	nCoV-2019_30_LEFT	NOINT	+
+MN908947.3	9245	9271	nCoV-2019_30_RIGHT	NOINT	-
+MN908947.3	9204	9226	nCoV-2019_31_LEFT	NOINT	+
+MN908947.3	9557	9585	nCoV-2019_31_RIGHT	NOINT	-
+MN908947.3	9477	9502	nCoV-2019_32_LEFT	NOINT	+
+MN908947.3	9834	9858	nCoV-2019_32_RIGHT	NOINT	-
+MN908947.3	9784	9806	nCoV-2019_33_LEFT	NOINT	+
+MN908947.3	10146	10171	nCoV-2019_33_RIGHT	NOINT	-
+MN908947.3	10076	10099	nCoV-2019_34_LEFT	NOINT	+
+MN908947.3	10437	10459	nCoV-2019_34_RIGHT	NOINT	-
+MN908947.3	10362	10384	nCoV-2019_35_LEFT	NOINT	+
+MN908947.3	10737	10763	nCoV-2019_35_RIGHT	NOINT	-
+MN908947.3	10666	10688	nCoV-2019_36_LEFT	NOINT	+
+MN908947.3	11048	11074	nCoV-2019_36_RIGHT	NOINT	-
+MN908947.3	10999	11022	nCoV-2019_37_LEFT	NOINT	+
+MN908947.3	11372	11394	nCoV-2019_37_RIGHT	NOINT	-
+MN908947.3	11306	11331	nCoV-2019_38_LEFT	NOINT	+
+MN908947.3	11668	11693	nCoV-2019_38_RIGHT	NOINT	-
+MN908947.3	11555	11584	nCoV-2019_39_LEFT	NOINT	+
+MN908947.3	11927	11949	nCoV-2019_39_RIGHT	NOINT	-
+MN908947.3	11863	11889	nCoV-2019_40_LEFT	NOINT	+
+MN908947.3	12234	12256	nCoV-2019_40_RIGHT	NOINT	-
+MN908947.3	12110	12133	nCoV-2019_41_LEFT	NOINT	+
+MN908947.3	12465	12490	nCoV-2019_41_RIGHT	NOINT	-
+MN908947.3	12417	12439	nCoV-2019_42_LEFT	NOINT	+
+MN908947.3	12779	12802	nCoV-2019_42_RIGHT	NOINT	-
+MN908947.3	12710	12732	nCoV-2019_43_LEFT	NOINT	+
+MN908947.3	13074	13096	nCoV-2019_43_RIGHT	NOINT	-
+MN908947.3	13005	13027	nCoV-2019_44_LEFT	NOINT	+
+MN908947.3	13378	13400	nCoV-2019_44_RIGHT	NOINT	-
+MN908947.3	13319	13344	nCoV-2019_45_LEFT	NOINT	+
+MN908947.3	13669	13699	nCoV-2019_45_RIGHT	NOINT	-
+MN908947.3	13599	13621	nCoV-2019_46_LEFT	NOINT	+
+MN908947.3	13962	13984	nCoV-2019_46_RIGHT	NOINT	-
+MN908947.3	13918	13946	nCoV-2019_47_LEFT	NOINT	+
+MN908947.3	14271	14299	nCoV-2019_47_RIGHT	NOINT	-
+MN908947.3	14207	14232	nCoV-2019_48_LEFT	NOINT	+
+MN908947.3	14579	14601	nCoV-2019_48_RIGHT	NOINT	-
+MN908947.3	14545	14570	nCoV-2019_49_LEFT	NOINT	+
+MN908947.3	14898	14926	nCoV-2019_49_RIGHT	NOINT	-
+MN908947.3	14865	14895	nCoV-2019_50_LEFT	NOINT	+
+MN908947.3	15224	15246	nCoV-2019_50_RIGHT	NOINT	-
+MN908947.3	15171	15193	nCoV-2019_51_LEFT	NOINT	+
+MN908947.3	15538	15560	nCoV-2019_51_RIGHT	NOINT	-
+MN908947.3	15481	15503	nCoV-2019_52_LEFT	NOINT	+
+MN908947.3	15861	15886	nCoV-2019_52_RIGHT	NOINT	-
+MN908947.3	15827	15851	nCoV-2019_53_LEFT	NOINT	+
+MN908947.3	16186	16209	nCoV-2019_53_RIGHT	NOINT	-
+MN908947.3	16118	16144	nCoV-2019_54_LEFT	NOINT	+
+MN908947.3	16485	16510	nCoV-2019_54_RIGHT	NOINT	-
+MN908947.3	16416	16444	nCoV-2019_55_LEFT	NOINT	+
+MN908947.3	16804	16833	nCoV-2019_55_RIGHT	NOINT	-
+MN908947.3	16748	16770	nCoV-2019_56_LEFT	NOINT	+
+MN908947.3	17130	17152	nCoV-2019_56_RIGHT	NOINT	-
+MN908947.3	17065	17087	nCoV-2019_57_LEFT	NOINT	+
+MN908947.3	17430	17452	nCoV-2019_57_RIGHT	NOINT	-
+MN908947.3	17381	17406	nCoV-2019_58_LEFT	NOINT	+
+MN908947.3	17738	17761	nCoV-2019_58_RIGHT	NOINT	-
+MN908947.3	17674	17697	nCoV-2019_59_LEFT	NOINT	+
+MN908947.3	18036	18062	nCoV-2019_59_RIGHT	NOINT	-
+MN908947.3	17966	17993	nCoV-2019_60_LEFT	NOINT	+
+MN908947.3	18324	18348	nCoV-2019_60_RIGHT	NOINT	-
+MN908947.3	18253	18275	nCoV-2019_61_LEFT	NOINT	+
+MN908947.3	18650	18672	nCoV-2019_61_RIGHT	NOINT	-
+MN908947.3	18596	18618	nCoV-2019_62_LEFT	NOINT	+
+MN908947.3	18957	18979	nCoV-2019_62_RIGHT	NOINT	-
+MN908947.3	18896	18918	nCoV-2019_63_LEFT	NOINT	+
+MN908947.3	19275	19297	nCoV-2019_63_RIGHT	NOINT	-
+MN908947.3	19204	19232	nCoV-2019_64_LEFT	NOINT	+
+MN908947.3	19591	19616	nCoV-2019_64_RIGHT	NOINT	-
+MN908947.3	19548	19570	nCoV-2019_65_LEFT	NOINT	+
+MN908947.3	19911	19939	nCoV-2019_65_RIGHT	NOINT	-
+MN908947.3	19844	19866	nCoV-2019_66_LEFT	NOINT	+
+MN908947.3	20231	20255	nCoV-2019_66_RIGHT	NOINT	-
+MN908947.3	20172	20200	nCoV-2019_67_LEFT	NOINT	+
+MN908947.3	20542	20572	nCoV-2019_67_RIGHT	NOINT	-
+MN908947.3	20472	20496	nCoV-2019_68_LEFT	NOINT	+
+MN908947.3	20867	20890	nCoV-2019_68_RIGHT	NOINT	-
+MN908947.3	20786	20813	nCoV-2019_69_LEFT	NOINT	+
+MN908947.3	21146	21169	nCoV-2019_69_RIGHT	NOINT	-
+MN908947.3	21075	21104	nCoV-2019_70_LEFT	NOINT	+
+MN908947.3	21427	21455	nCoV-2019_70_RIGHT	NOINT	-
+MN908947.3	21357	21386	nCoV-2019_71_LEFT	NOINT	+
+MN908947.3	21716	21743	nCoV-2019_71_RIGHT	NOINT	-
+MN908947.3	21658	21682	nCoV-2019_72_LEFT	NOINT	+
+MN908947.3	22013	22038	nCoV-2019_72_RIGHT	NOINT	-
+MN908947.3	21961	21990	nCoV-2019_73_LEFT	NOINT	+
+MN908947.3	22324	22346	nCoV-2019_73_RIGHT	NOINT	-
+MN908947.3	22262	22290	nCoV-2019_74_LEFT	NOINT	+
+MN908947.3	22626	22650	nCoV-2019_74_RIGHT	NOINT	-
+MN908947.3	22516	22542	nCoV-2019_75_LEFT	NOINT	+
+MN908947.3	22877	22903	nCoV-2019_75_RIGHT	NOINT	-
+MN908947.3	22797	22819	nCoV-2019_76_LEFT	NOINT	+
+MN908947.3	23192	23214	nCoV-2019_76_RIGHT	NOINT	-
+MN908947.3	23122	23144	nCoV-2019_77_LEFT	NOINT	+
+MN908947.3	23500	23522	nCoV-2019_77_RIGHT	NOINT	-
+MN908947.3	23443	23466	nCoV-2019_78_LEFT	NOINT	+
+MN908947.3	23822	23847	nCoV-2019_78_RIGHT	NOINT	-
+MN908947.3	23789	23812	nCoV-2019_79_LEFT	NOINT	+
+MN908947.3	24145	24169	nCoV-2019_79_RIGHT	NOINT	-
+MN908947.3	24078	24100	nCoV-2019_80_LEFT	NOINT	+
+MN908947.3	24443	24467	nCoV-2019_80_RIGHT	NOINT	-
+MN908947.3	24391	24416	nCoV-2019_81_LEFT	NOINT	+
+MN908947.3	24765	24789	nCoV-2019_81_RIGHT	NOINT	-
+MN908947.3	24696	24721	nCoV-2019_82_LEFT	NOINT	+
+MN908947.3	25052	25076	nCoV-2019_82_RIGHT	NOINT	-
+MN908947.3	24978	25003	nCoV-2019_83_LEFT	NOINT	+
+MN908947.3	25347	25369	nCoV-2019_83_RIGHT	NOINT	-
+MN908947.3	25279	25301	nCoV-2019_84_LEFT	NOINT	+
+MN908947.3	25646	25673	nCoV-2019_84_RIGHT	NOINT	-
+MN908947.3	25601	25623	nCoV-2019_85_LEFT	NOINT	+
+MN908947.3	25969	25994	nCoV-2019_85_RIGHT	NOINT	-
+MN908947.3	25902	25924	nCoV-2019_86_LEFT	NOINT	+
+MN908947.3	26290	26315	nCoV-2019_86_RIGHT	NOINT	-
+MN908947.3	26197	26219	nCoV-2019_87_LEFT	NOINT	+
+MN908947.3	26566	26590	nCoV-2019_87_RIGHT	NOINT	-
+MN908947.3	26520	26542	nCoV-2019_88_LEFT	NOINT	+
+MN908947.3	26890	26913	nCoV-2019_88_RIGHT	NOINT	-
+MN908947.3	26835	26857	nCoV-2019_89_LEFT	NOINT	+
+MN908947.3	27202	27227	nCoV-2019_89_RIGHT	NOINT	-
+MN908947.3	27141	27164	nCoV-2019_90_LEFT	NOINT	+
+MN908947.3	27511	27533	nCoV-2019_90_RIGHT	NOINT	-
+MN908947.3	27446	27471	nCoV-2019_91_LEFT	NOINT	+
+MN908947.3	27825	27854	nCoV-2019_91_RIGHT	NOINT	-
+MN908947.3	27784	27808	nCoV-2019_92_LEFT	NOINT	+
+MN908947.3	28145	28172	nCoV-2019_92_RIGHT	NOINT	-
+MN908947.3	28081	28104	nCoV-2019_93_LEFT	NOINT	+
+MN908947.3	28442	28464	nCoV-2019_93_RIGHT	NOINT	-
+MN908947.3	28394	28416	nCoV-2019_94_LEFT	NOINT	+
+MN908947.3	28756	28779	nCoV-2019_94_RIGHT	NOINT	-
+MN908947.3	28677	28699	nCoV-2019_95_LEFT	NOINT	+
+MN908947.3	29041	29063	nCoV-2019_95_RIGHT	NOINT	-
+MN908947.3	28985	29007	nCoV-2019_96_LEFT	NOINT	+
+MN908947.3	29356	29378	nCoV-2019_96_RIGHT	NOINT	-
+MN908947.3	29288	29316	nCoV-2019_97_LEFT	NOINT	+
+MN908947.3	29665	29693	nCoV-2019_97_RIGHT	NOINT	-
+MN908947.3	29486	29510	nCoV-2019_98_LEFT	NOINT	+
+MN908947.3	29836	29866	nCoV-2019_98_RIGHT	NOINT	-