Mercurial > repos > iuc > ivar_trim
changeset 4:db536ad45f28 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 693df287d23b0fd9dfd134b41d401a438c3f5ad6"
author | iuc |
---|---|
date | Mon, 22 Jun 2020 07:30:46 -0400 |
parents | 5d6ed46cc101 |
children | cf65217ad61c |
files | completemask.py ivar_trim.xml sanitize_bed.py test-data/covid19/ARTIC-V1-bad.bed |
diffstat | 4 files changed, 262 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/completemask.py Mon Jun 22 07:30:46 2020 -0400 @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +import sys + + +if __name__ == '__main__': + with open(sys.argv[1]) as i: + getmasked_output = i.readline().strip() + + if not getmasked_output: + print() + print('No affected primer binding sites found!') + else: + masked_primers = getmasked_output.split('\t') + with open(sys.argv[2]) as i: + amplicon_data = [line.strip().split('\t') for line in i] + + masked_complete = [] + for primer in masked_primers: + for amplicon in amplicon_data: + if primer in amplicon: + masked_complete += amplicon + result = '\t'.join(sorted(set(masked_complete))) + print() + print('Removing reads primed with any of:') + print(result) + with open(sys.argv[1], 'w') as o: + o.write(result + '\n')
--- a/ivar_trim.xml Fri Jun 05 04:12:52 2020 -0400 +++ b/ivar_trim.xml Mon Jun 22 07:30:46 2020 -0400 @@ -1,17 +1,23 @@ -<tool id="ivar_trim" name="ivar trim" version="@VERSION@+galaxy0"> +<tool id="ivar_trim" name="ivar trim" version="@VERSION@+galaxy1"> <description>Trim reads in aligned BAM</description> <macros> <import>macros.xml</import> </macros> - <expand macro="requirements" /> + <expand macro="requirements"> + <requirement type="package" version="3.8.1">python</requirement> + </expand> <expand macro="version_command" /> <command detect_errors="exit_code"><![CDATA[ #if $primer.source == "history" - ln -s '$primer.input_bed' bed.bed && + cp '$primer.input_bed' bed.bed && #else - ln -s '$primer.cached_bed.fields.path' bed.bed && - #end if + cp '$primer.cached_bed.fields.path' bed.bed && + #end if + python '$__tool_directory__/sanitize_bed.py' bed.bed && + ln -s '$input_bam' sorted.bam && + ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai && + ivar trim -i sorted.bam -b bed.bed @@ -57,6 +63,13 @@ <param name="inc_primers" value="true" /> <output name="output_bam" file="covid19/PC00101P_sub.sorted.bam" compare="sim_size" delta="300000"/> </test> + <test> + <!-- Test with primer bed file that needs to be sanitized --> + <param name="input_bam" value="covid19/PC00101P_sub.sorted.bam" /> + <param name="input_bed" value="covid19/ARTIC-V1-bad.bed" /> + <param name="inc_primers" value="true" /> + <output name="output_bam" file="covid19/PC00101P_sub.sorted.bam" compare="sim_size" delta="300000"/> + </test> <!-- #1: Zika data--> <test> <conditional name="primer">
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sanitize_bed.py Mon Jun 22 07:30:46 2020 -0400 @@ -0,0 +1,20 @@ +#!/usr/bin/env python + +import sys + + +with open(sys.argv[1]) as i: + bed_data = i.readlines() + +sanitized_data = [] +try: + for record in bed_data: + fields = record.split('\t') + sanitized_data.append( + '\t'.join(fields[:4] + ['60'] + fields[5:]) + ) +except IndexError: + pass # leave column number issue to getmasked +else: + with open(sys.argv[1], 'w') as o: + o.writelines(sanitized_data)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/covid19/ARTIC-V1-bad.bed Mon Jun 22 07:30:46 2020 -0400 @@ -0,0 +1,196 @@ +MN908947.3 30 54 nCoV-2019_1_LEFT NOINT + +MN908947.3 385 410 nCoV-2019_1_RIGHT NOINT - +MN908947.3 320 342 nCoV-2019_2_LEFT NOINT + +MN908947.3 704 726 nCoV-2019_2_RIGHT NOINT - +MN908947.3 642 664 nCoV-2019_3_LEFT NOINT + +MN908947.3 1004 1028 nCoV-2019_3_RIGHT NOINT - +MN908947.3 943 965 nCoV-2019_4_LEFT NOINT + +MN908947.3 1312 1337 nCoV-2019_4_RIGHT NOINT - +MN908947.3 1242 1264 nCoV-2019_5_LEFT NOINT + +MN908947.3 1623 1651 nCoV-2019_5_RIGHT NOINT - +MN908947.3 1573 1595 nCoV-2019_6_LEFT NOINT + +MN908947.3 1942 1964 nCoV-2019_6_RIGHT NOINT - +MN908947.3 1875 1897 nCoV-2019_7_LEFT NOINT + +MN908947.3 2247 2269 nCoV-2019_7_RIGHT NOINT - +MN908947.3 2181 2205 nCoV-2019_8_LEFT NOINT + +MN908947.3 2568 2592 nCoV-2019_8_RIGHT NOINT - +MN908947.3 2505 2529 nCoV-2019_9_LEFT NOINT + +MN908947.3 2882 2904 nCoV-2019_9_RIGHT NOINT - +MN908947.3 2826 2850 nCoV-2019_10_LEFT NOINT + +MN908947.3 3183 3210 nCoV-2019_10_RIGHT NOINT - +MN908947.3 3144 3166 nCoV-2019_11_LEFT NOINT + +MN908947.3 3507 3531 nCoV-2019_11_RIGHT NOINT - +MN908947.3 3460 3482 nCoV-2019_12_LEFT NOINT + +MN908947.3 3826 3853 nCoV-2019_12_RIGHT NOINT - +MN908947.3 3771 3795 nCoV-2019_13_LEFT NOINT + +MN908947.3 4142 4164 nCoV-2019_13_RIGHT NOINT - +MN908947.3 4054 4077 nCoV-2019_14_LEFT NOINT + +MN908947.3 4428 4450 nCoV-2019_14_RIGHT NOINT - +MN908947.3 4294 4321 nCoV-2019_15_LEFT NOINT + +MN908947.3 4674 4696 nCoV-2019_15_RIGHT NOINT - +MN908947.3 4636 4658 nCoV-2019_16_LEFT NOINT + +MN908947.3 4995 5017 nCoV-2019_16_RIGHT NOINT - +MN908947.3 4939 4966 nCoV-2019_17_LEFT NOINT + +MN908947.3 5296 5321 nCoV-2019_17_RIGHT NOINT - +MN908947.3 5230 5259 nCoV-2019_18_LEFT NOINT + +MN908947.3 5620 5644 nCoV-2019_18_RIGHT NOINT - +MN908947.3 5563 5586 nCoV-2019_19_LEFT NOINT + +MN908947.3 5932 5957 nCoV-2019_19_RIGHT NOINT - +MN908947.3 5867 5894 nCoV-2019_20_LEFT NOINT + +MN908947.3 6247 6272 nCoV-2019_20_RIGHT NOINT - +MN908947.3 6167 6196 nCoV-2019_21_LEFT NOINT + +MN908947.3 6528 6550 nCoV-2019_21_RIGHT NOINT - +MN908947.3 6466 6495 nCoV-2019_22_LEFT NOINT + +MN908947.3 6846 6873 nCoV-2019_22_RIGHT NOINT - +MN908947.3 6718 6745 nCoV-2019_23_LEFT NOINT + +MN908947.3 7092 7117 nCoV-2019_23_RIGHT NOINT - +MN908947.3 7035 7058 nCoV-2019_24_LEFT NOINT + +MN908947.3 7389 7415 nCoV-2019_24_RIGHT NOINT - +MN908947.3 7305 7332 nCoV-2019_25_LEFT NOINT + +MN908947.3 7671 7694 nCoV-2019_25_RIGHT NOINT - +MN908947.3 7626 7651 nCoV-2019_26_LEFT NOINT + +MN908947.3 7997 8019 nCoV-2019_26_RIGHT NOINT - +MN908947.3 7943 7968 nCoV-2019_27_LEFT NOINT + +MN908947.3 8319 8341 nCoV-2019_27_RIGHT NOINT - +MN908947.3 8249 8275 nCoV-2019_28_LEFT NOINT + +MN908947.3 8635 8661 nCoV-2019_28_RIGHT NOINT - +MN908947.3 8595 8619 nCoV-2019_29_LEFT NOINT + +MN908947.3 8954 8983 nCoV-2019_29_RIGHT NOINT - +MN908947.3 8888 8913 nCoV-2019_30_LEFT NOINT + +MN908947.3 9245 9271 nCoV-2019_30_RIGHT NOINT - +MN908947.3 9204 9226 nCoV-2019_31_LEFT NOINT + +MN908947.3 9557 9585 nCoV-2019_31_RIGHT NOINT - +MN908947.3 9477 9502 nCoV-2019_32_LEFT NOINT + +MN908947.3 9834 9858 nCoV-2019_32_RIGHT NOINT - +MN908947.3 9784 9806 nCoV-2019_33_LEFT NOINT + +MN908947.3 10146 10171 nCoV-2019_33_RIGHT NOINT - +MN908947.3 10076 10099 nCoV-2019_34_LEFT NOINT + +MN908947.3 10437 10459 nCoV-2019_34_RIGHT NOINT - +MN908947.3 10362 10384 nCoV-2019_35_LEFT NOINT + +MN908947.3 10737 10763 nCoV-2019_35_RIGHT NOINT - +MN908947.3 10666 10688 nCoV-2019_36_LEFT NOINT + +MN908947.3 11048 11074 nCoV-2019_36_RIGHT NOINT - +MN908947.3 10999 11022 nCoV-2019_37_LEFT NOINT + +MN908947.3 11372 11394 nCoV-2019_37_RIGHT NOINT - +MN908947.3 11306 11331 nCoV-2019_38_LEFT NOINT + +MN908947.3 11668 11693 nCoV-2019_38_RIGHT NOINT - +MN908947.3 11555 11584 nCoV-2019_39_LEFT NOINT + +MN908947.3 11927 11949 nCoV-2019_39_RIGHT NOINT - +MN908947.3 11863 11889 nCoV-2019_40_LEFT NOINT + +MN908947.3 12234 12256 nCoV-2019_40_RIGHT NOINT - +MN908947.3 12110 12133 nCoV-2019_41_LEFT NOINT + +MN908947.3 12465 12490 nCoV-2019_41_RIGHT NOINT - +MN908947.3 12417 12439 nCoV-2019_42_LEFT NOINT + +MN908947.3 12779 12802 nCoV-2019_42_RIGHT NOINT - +MN908947.3 12710 12732 nCoV-2019_43_LEFT NOINT + +MN908947.3 13074 13096 nCoV-2019_43_RIGHT NOINT - +MN908947.3 13005 13027 nCoV-2019_44_LEFT NOINT + +MN908947.3 13378 13400 nCoV-2019_44_RIGHT NOINT - +MN908947.3 13319 13344 nCoV-2019_45_LEFT NOINT + +MN908947.3 13669 13699 nCoV-2019_45_RIGHT NOINT - +MN908947.3 13599 13621 nCoV-2019_46_LEFT NOINT + +MN908947.3 13962 13984 nCoV-2019_46_RIGHT NOINT - +MN908947.3 13918 13946 nCoV-2019_47_LEFT NOINT + +MN908947.3 14271 14299 nCoV-2019_47_RIGHT NOINT - +MN908947.3 14207 14232 nCoV-2019_48_LEFT NOINT + +MN908947.3 14579 14601 nCoV-2019_48_RIGHT NOINT - +MN908947.3 14545 14570 nCoV-2019_49_LEFT NOINT + +MN908947.3 14898 14926 nCoV-2019_49_RIGHT NOINT - +MN908947.3 14865 14895 nCoV-2019_50_LEFT NOINT + +MN908947.3 15224 15246 nCoV-2019_50_RIGHT NOINT - +MN908947.3 15171 15193 nCoV-2019_51_LEFT NOINT + +MN908947.3 15538 15560 nCoV-2019_51_RIGHT NOINT - +MN908947.3 15481 15503 nCoV-2019_52_LEFT NOINT + +MN908947.3 15861 15886 nCoV-2019_52_RIGHT NOINT - +MN908947.3 15827 15851 nCoV-2019_53_LEFT NOINT + +MN908947.3 16186 16209 nCoV-2019_53_RIGHT NOINT - +MN908947.3 16118 16144 nCoV-2019_54_LEFT NOINT + +MN908947.3 16485 16510 nCoV-2019_54_RIGHT NOINT - +MN908947.3 16416 16444 nCoV-2019_55_LEFT NOINT + +MN908947.3 16804 16833 nCoV-2019_55_RIGHT NOINT - +MN908947.3 16748 16770 nCoV-2019_56_LEFT NOINT + +MN908947.3 17130 17152 nCoV-2019_56_RIGHT NOINT - +MN908947.3 17065 17087 nCoV-2019_57_LEFT NOINT + +MN908947.3 17430 17452 nCoV-2019_57_RIGHT NOINT - +MN908947.3 17381 17406 nCoV-2019_58_LEFT NOINT + +MN908947.3 17738 17761 nCoV-2019_58_RIGHT NOINT - +MN908947.3 17674 17697 nCoV-2019_59_LEFT NOINT + +MN908947.3 18036 18062 nCoV-2019_59_RIGHT NOINT - +MN908947.3 17966 17993 nCoV-2019_60_LEFT NOINT + +MN908947.3 18324 18348 nCoV-2019_60_RIGHT NOINT - +MN908947.3 18253 18275 nCoV-2019_61_LEFT NOINT + +MN908947.3 18650 18672 nCoV-2019_61_RIGHT NOINT - +MN908947.3 18596 18618 nCoV-2019_62_LEFT NOINT + +MN908947.3 18957 18979 nCoV-2019_62_RIGHT NOINT - +MN908947.3 18896 18918 nCoV-2019_63_LEFT NOINT + +MN908947.3 19275 19297 nCoV-2019_63_RIGHT NOINT - +MN908947.3 19204 19232 nCoV-2019_64_LEFT NOINT + +MN908947.3 19591 19616 nCoV-2019_64_RIGHT NOINT - +MN908947.3 19548 19570 nCoV-2019_65_LEFT NOINT + +MN908947.3 19911 19939 nCoV-2019_65_RIGHT NOINT - +MN908947.3 19844 19866 nCoV-2019_66_LEFT NOINT + +MN908947.3 20231 20255 nCoV-2019_66_RIGHT NOINT - +MN908947.3 20172 20200 nCoV-2019_67_LEFT NOINT + +MN908947.3 20542 20572 nCoV-2019_67_RIGHT NOINT - +MN908947.3 20472 20496 nCoV-2019_68_LEFT NOINT + +MN908947.3 20867 20890 nCoV-2019_68_RIGHT NOINT - +MN908947.3 20786 20813 nCoV-2019_69_LEFT NOINT + +MN908947.3 21146 21169 nCoV-2019_69_RIGHT NOINT - +MN908947.3 21075 21104 nCoV-2019_70_LEFT NOINT + +MN908947.3 21427 21455 nCoV-2019_70_RIGHT NOINT - +MN908947.3 21357 21386 nCoV-2019_71_LEFT NOINT + +MN908947.3 21716 21743 nCoV-2019_71_RIGHT NOINT - +MN908947.3 21658 21682 nCoV-2019_72_LEFT NOINT + +MN908947.3 22013 22038 nCoV-2019_72_RIGHT NOINT - +MN908947.3 21961 21990 nCoV-2019_73_LEFT NOINT + +MN908947.3 22324 22346 nCoV-2019_73_RIGHT NOINT - +MN908947.3 22262 22290 nCoV-2019_74_LEFT NOINT + +MN908947.3 22626 22650 nCoV-2019_74_RIGHT NOINT - +MN908947.3 22516 22542 nCoV-2019_75_LEFT NOINT + +MN908947.3 22877 22903 nCoV-2019_75_RIGHT NOINT - +MN908947.3 22797 22819 nCoV-2019_76_LEFT NOINT + +MN908947.3 23192 23214 nCoV-2019_76_RIGHT NOINT - +MN908947.3 23122 23144 nCoV-2019_77_LEFT NOINT + +MN908947.3 23500 23522 nCoV-2019_77_RIGHT NOINT - +MN908947.3 23443 23466 nCoV-2019_78_LEFT NOINT + +MN908947.3 23822 23847 nCoV-2019_78_RIGHT NOINT - +MN908947.3 23789 23812 nCoV-2019_79_LEFT NOINT + +MN908947.3 24145 24169 nCoV-2019_79_RIGHT NOINT - +MN908947.3 24078 24100 nCoV-2019_80_LEFT NOINT + +MN908947.3 24443 24467 nCoV-2019_80_RIGHT NOINT - +MN908947.3 24391 24416 nCoV-2019_81_LEFT NOINT + +MN908947.3 24765 24789 nCoV-2019_81_RIGHT NOINT - +MN908947.3 24696 24721 nCoV-2019_82_LEFT NOINT + +MN908947.3 25052 25076 nCoV-2019_82_RIGHT NOINT - +MN908947.3 24978 25003 nCoV-2019_83_LEFT NOINT + +MN908947.3 25347 25369 nCoV-2019_83_RIGHT NOINT - +MN908947.3 25279 25301 nCoV-2019_84_LEFT NOINT + +MN908947.3 25646 25673 nCoV-2019_84_RIGHT NOINT - +MN908947.3 25601 25623 nCoV-2019_85_LEFT NOINT + +MN908947.3 25969 25994 nCoV-2019_85_RIGHT NOINT - +MN908947.3 25902 25924 nCoV-2019_86_LEFT NOINT + +MN908947.3 26290 26315 nCoV-2019_86_RIGHT NOINT - +MN908947.3 26197 26219 nCoV-2019_87_LEFT NOINT + +MN908947.3 26566 26590 nCoV-2019_87_RIGHT NOINT - +MN908947.3 26520 26542 nCoV-2019_88_LEFT NOINT + +MN908947.3 26890 26913 nCoV-2019_88_RIGHT NOINT - +MN908947.3 26835 26857 nCoV-2019_89_LEFT NOINT + +MN908947.3 27202 27227 nCoV-2019_89_RIGHT NOINT - +MN908947.3 27141 27164 nCoV-2019_90_LEFT NOINT + +MN908947.3 27511 27533 nCoV-2019_90_RIGHT NOINT - +MN908947.3 27446 27471 nCoV-2019_91_LEFT NOINT + +MN908947.3 27825 27854 nCoV-2019_91_RIGHT NOINT - +MN908947.3 27784 27808 nCoV-2019_92_LEFT NOINT + +MN908947.3 28145 28172 nCoV-2019_92_RIGHT NOINT - +MN908947.3 28081 28104 nCoV-2019_93_LEFT NOINT + +MN908947.3 28442 28464 nCoV-2019_93_RIGHT NOINT - +MN908947.3 28394 28416 nCoV-2019_94_LEFT NOINT + +MN908947.3 28756 28779 nCoV-2019_94_RIGHT NOINT - +MN908947.3 28677 28699 nCoV-2019_95_LEFT NOINT + +MN908947.3 29041 29063 nCoV-2019_95_RIGHT NOINT - +MN908947.3 28985 29007 nCoV-2019_96_LEFT NOINT + +MN908947.3 29356 29378 nCoV-2019_96_RIGHT NOINT - +MN908947.3 29288 29316 nCoV-2019_97_LEFT NOINT + +MN908947.3 29665 29693 nCoV-2019_97_RIGHT NOINT - +MN908947.3 29486 29510 nCoV-2019_98_LEFT NOINT + +MN908947.3 29836 29866 nCoV-2019_98_RIGHT NOINT -