Repository 'cooc_mutbamscan'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/cooc_mutbamscan

Changeset 0:373c1735d31f (2022-08-11)
Next changeset 1:9f1660129403 (2023-07-31)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cojac commit 38ed91999d4bbe3bedc294197926ea332eb6cd5e
added:
cooc_mutbamscan.xml
macros.xml
test-data/amplicons111.yaml
test-data/cooc-test111.json
test-data/nCoV-2019.insert.V3.bed
test-data/omicron_ba1_mutations.yaml
test-data/tbam11.bam
b
diff -r 000000000000 -r 373c1735d31f cooc_mutbamscan.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cooc_mutbamscan.xml Thu Aug 11 13:50:37 2022 +0000
[
@@ -0,0 +1,155 @@
+<tool id="cooc_mutbamscan" name="Cojac: mutbamscan" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
+      profile="@PROFILE@">
+    <description>
+        scan an alignment file for mutation co-occurrences
+    </description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="biotools"/>
+    <expand macro="requirements"/>
+    <expand macro="version"/>
+    <command detect_errors="exit_code"><![CDATA[
+ln -s '$bed_file' 'bed_file' &&
+@VOCDIR_COMMAND@
+#import re
+#set identifier = re.sub('[^\s\w\-\\.]', '_', str($bam_file.element_identifier))
+ln -s '$bam_file' ${identifier}.bam &&
+ln -s '${bam_file.metadata.bam_index}' ${identifier}.bai &&
+cooc-mutbamscan
+    -a '${identifier}.bam'
+    -b 'bed_file'
+    -m '$vocdir'
+    -y cooc.yaml
+    -j cooc.json
+    #if $amplicons_file.choice == 'build'
+        -A amplicons.yaml
+    #else
+        -Q '$amplicons_file.in_amp'
+    #end if
+    -t cooc.tsv
+    --cooc $cooc
+    ]]></command>
+    <inputs>
+        <expand macro="vocdir_input"/>
+        <param name="bed_file" type="data" format="bed"
+               label="BED file defining the amplicons"/>
+        <param name="bam_file" type="data" format="bam,cram,sam"
+               label="Alignment BAM/CRAM/SAM file"/>
+        <param argument="--cooc" type="integer" min="1" value="2"
+               label="Minimum number of cooccurence mutations on the same amplicon"/>
+        <conditional name="amplicons_file">
+            <param name="choice" type="select" label="Source of amplicons YAML file">
+                <option value="build">Build from BED + set of YAMLs for variants of concern</option>
+                <option value="custom">From history</option>
+            </param>
+            <when value="build"/>
+            <when value="custom">
+                <param name="in_amp" type="data" format="yaml"
+                       label="YAML file to query amplicons"/>
+            </when>
+        </conditional>
+        <param name="output_files" type="select" display="checkboxes"
+               multiple="true" label="Output files">
+            <option value="yaml" selected="true">YAML</option>
+            <option value="json">JSON</option>
+            <option value="tabular">tabular</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="cooc_yaml" format="yaml"
+              label="${tool.name} on ${on_string}: Mutation cooccurrence (yaml)"
+              from_work_dir="cooc.yaml">
+            <filter>'yaml' in output_files</filter>
+        </data>
+        <data name="cooc_json" format="json"
+              label="${tool.name} on ${on_string}: Mutation cooccurrence (json)"
+              from_work_dir="cooc.json">
+            <filter>'json' in output_files</filter>
+        </data>
+        <data name="cooc_tsv" format="tabular"
+              label="${tool.name} on ${on_string}: Mutation cooccurrence (tabular)"
+              from_work_dir="cooc.tsv">
+            <filter>'tabular' in output_files</filter>
+        </data>
+        <data name="amplicons" format="yaml"
+              label="${tool.name} on ${on_string}: Amplicons (yaml)"
+              from_work_dir="amplicons.yaml">
+            <filter>amplicons_file['choice'] == 'build'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test 1: build yaml for amplicons from bed and voc/ -->
+        <test expect_num_outputs="4">
+            <conditional name="vocdir_option">
+                <param name="choice" value="custom"/>
+                <param name="voc_file" value="omicron_ba1_mutations.yaml"/>
+            </conditional>
+            <param name="bam_file" value="tbam11.bam"/>
+            <param name="bed_file" value="nCoV-2019.insert.V3.bed"/>
+            <conditional name="amplicons_file">
+                <param name="choice" value="build"/>
+            </conditional>
+            <param name="output_files" value="yaml,json,tabular"/>
+            <output name="cooc_yaml" ftype="yaml">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="cooc_json" ftype="json">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="cooc_tsv" ftype="tabular">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="amplicons" ftype="yaml">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 2: supply yaml for amplicons and voc/ from cache -->
+        <test expect_num_outputs="2">
+            <conditional name="vocdir_option">
+                <param name="choice" value="cache"/>
+            </conditional>
+            <param name="bam_file" value="tbam11.bam"/>
+            <param name="bed_file" value="nCoV-2019.insert.V3.bed"/>
+            <conditional name="amplicons_file">
+                <param name="choice" value="custom"/>
+                <param name="in_amp" value="amplicons111.yaml"/>
+            </conditional>
+            <param name="output_files" value="yaml,tabular"/>
+            <output name="cooc_yaml" ftype="yaml">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+            <output name="cooc_tsv" ftype="tabular">
+                <assert_contents>
+                    <has_text text="76_om1"/>
+                    <has_text text="81_om1"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+@HELP_HEADER@
+
+Information about **cooc-mutbamscan** method
+============================================
+
+The method scans an alignment BAM/CRAM/SAM file for mutation co-occurrences and output a JSON or YAML file.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 373c1735d31f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Aug 11 13:50:37 2022 +0000
[
@@ -0,0 +1,62 @@
+<?xml version="1.0"?>
+<macros>
+    <token name="@TOOL_VERSION@">0.2</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">21.01</token>
+    <xml name="biotools">
+        <xrefs>
+            <xref type="bio.tools">cojac</xref>
+        </xrefs>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">cojac</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="version">
+        <version_command>echo @TOOL_VERSION@</version_command>
+    </xml>
+    <xml name="vocdir_input">
+        <conditional name="vocdir_option">
+            <param name="choice" type="select"
+                   label="Source of YAML files with definition of the variant of concerns"
+                   help="Cojac ships with a directory with variant definitions yaml files (https://github.com/cbg-ethz/cojac/tree/master/voc), which the tool can access internally. You can also download the latest version of the yaml files from https://github.com/phe-genomics/variant_definitions and use it as a custom yamls defining the variant of concerns.">
+                <option value="cache">Definitions shipped with the tool (can be outdated)</option>
+                <option value="custom">From history</option>
+            </param>
+            <when value="cache"/>
+            <when value="custom">
+                <param name="voc_file" type="data" format="yaml" multiple="true"
+                       label="YAML defining the variant of concern"/>
+            </when>
+        </conditional>
+    </xml>
+    <token name="@VOCDIR_COMMAND@"><![CDATA[
+#if $vocdir_option.choice == 'custom'
+    #set vocdir = 'voc/'
+    #set file_paths1 = []
+    mkdir -p voc &&
+    #for $input_file in $voc_file
+        #set $file_path = $vocdir + $input_file.element_identifier
+        ln -s '$input_file' '$file_path' &&
+        $file_paths1.append($file_path)
+    #end for
+#else
+    DB_PATH="\$(dirname "\$(dirname "\$(which cooc-mutbamscan)")")/share/cojac" &&
+    ln -s "\$DB_PATH" db &&
+    #set $vocdir = 'db/voc'
+#end if
+]]></token>
+    <token name="@HELP_HEADER@"><![CDATA[
+What it does
+============
+
+The cojac package comprises a set of command-line tools to analyse co-occurrence of mutations on amplicons. It is useful, for example, for early detection of viral variants of concern (e.g. Alpha, Delta, Omicron) in environmental samples, and has been designed to scan for multiple SARS-CoV-2 variants in wastewater samples.
+]]></token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1101/2021.01.08.21249379</citation>
+        </citations>
+    </xml>
+</macros>
\ No newline at end of file
b
diff -r 000000000000 -r 373c1735d31f test-data/amplicons111.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/amplicons111.yaml Thu Aug 11 13:50:37 2022 +0000
[
@@ -0,0 +1,3 @@
+76_om1: [22821, 23189, 22907, 23114, {22898: A, 23048: A}]
+81_om1: [24416, 24765, 24473, 24691, {24469: A, 24503: T}]
+
b
diff -r 000000000000 -r 373c1735d31f test-data/cooc-test111.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cooc-test111.json Thu Aug 11 13:50:37 2022 +0000
b
@@ -0,0 +1,1 @@
+{"tbam11.bam": {"76_om1": {"sites": {"1": 68, "2": 4}, "muts": {}}, "81_om1": {"sites": {"2": 211}, "muts": {"1": 209}}}}
\ No newline at end of file
b
diff -r 000000000000 -r 373c1735d31f test-data/nCoV-2019.insert.V3.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/nCoV-2019.insert.V3.bed Thu Aug 11 13:50:37 2022 +0000
b
@@ -0,0 +1,98 @@
+MN908947.3 54 385 1 1 +
+MN908947.3 342 704 2 2 +
+MN908947.3 664 1004 3 1 +
+MN908947.3 965 1312 4 2 +
+MN908947.3 1264 1623 5 1 +
+MN908947.3 1595 1942 6 2 +
+MN908947.3 1897 2242 7 1 +
+MN908947.3 2205 2568 8 2 +
+MN908947.3 2529 2880 9 1 +
+MN908947.3 2850 3183 10 2 +
+MN908947.3 3166 3507 11 1 +
+MN908947.3 3482 3826 12 2 +
+MN908947.3 3795 4142 13 1 +
+MN908947.3 4077 4402 14 2 +
+MN908947.3 4322 4666 15 1 +
+MN908947.3 4658 4995 16 2 +
+MN908947.3 4966 5296 17 1 +
+MN908947.3 5287 5620 18 2 +
+MN908947.3 5586 5932 19 1 +
+MN908947.3 5894 6247 20 2 +
+MN908947.3 6197 6526 21 1 +
+MN908947.3 6495 6846 22 2 +
+MN908947.3 6745 7092 23 1 +
+MN908947.3 7058 7389 24 2 +
+MN908947.3 7332 7671 25 1 +
+MN908947.3 7651 7997 26 2 +
+MN908947.3 7968 8319 27 1 +
+MN908947.3 8275 8635 28 2 +
+MN908947.3 8619 8954 29 1 +
+MN908947.3 8913 9245 30 2 +
+MN908947.3 9226 9557 31 1 +
+MN908947.3 9502 9834 32 2 +
+MN908947.3 9806 10146 33 1 +
+MN908947.3 10099 10437 34 2 +
+MN908947.3 10384 10737 35 1 +
+MN908947.3 10688 11048 36 2 +
+MN908947.3 11022 11372 37 1 +
+MN908947.3 11331 11668 38 2 +
+MN908947.3 11584 11927 39 1 +
+MN908947.3 11889 12234 40 2 +
+MN908947.3 12133 12465 41 1 +
+MN908947.3 12439 12779 42 2 +
+MN908947.3 12732 13074 43 1 +
+MN908947.3 13029 13363 44 2 +
+MN908947.3 13344 13660 45 1 +
+MN908947.3 13625 13961 46 2 +
+MN908947.3 13946 14271 47 1 +
+MN908947.3 14232 14579 48 2 +
+MN908947.3 14570 14898 49 1 +
+MN908947.3 14895 15224 50 2 +
+MN908947.3 15193 15538 51 1 +
+MN908947.3 15503 15861 52 2 +
+MN908947.3 15851 16186 53 1 +
+MN908947.3 16144 16485 54 2 +
+MN908947.3 16444 16804 55 1 +
+MN908947.3 16770 17130 56 2 +
+MN908947.3 17087 17430 57 1 +
+MN908947.3 17406 17738 58 2 +
+MN908947.3 17697 18036 59 1 +
+MN908947.3 17993 18324 60 2 +
+MN908947.3 18275 18650 61 1 +
+MN908947.3 18618 18957 62 2 +
+MN908947.3 18918 19275 63 1 +
+MN908947.3 19232 19591 64 2 +
+MN908947.3 19570 19911 65 1 +
+MN908947.3 19866 20231 66 2 +
+MN908947.3 20200 20542 67 1 +
+MN908947.3 20496 20867 68 2 +
+MN908947.3 20813 21146 69 1 +
+MN908947.3 21104 21427 70 2 +
+MN908947.3 21386 21716 71 1 +
+MN908947.3 21682 22013 72 2 +
+MN908947.3 21990 22324 73 1 +
+MN908947.3 22290 22626 74 2 +
+MN908947.3 22542 22877 75 1 +
+MN908947.3 22821 23189 76 2 +
+MN908947.3 23144 23500 77 1 +
+MN908947.3 23466 23822 78 2 +
+MN908947.3 23812 24145 79 1 +
+MN908947.3 24100 24443 80 2 +
+MN908947.3 24416 24765 81 1 +
+MN908947.3 24721 25052 82 2 +
+MN908947.3 25003 25347 83 1 +
+MN908947.3 25301 25646 84 2 +
+MN908947.3 25623 25969 85 1 +
+MN908947.3 25924 26290 86 2 +
+MN908947.3 26219 26566 87 1 +
+MN908947.3 26542 26890 88 2 +
+MN908947.3 26860 27190 89 1 +
+MN908947.3 27164 27511 90 2 +
+MN908947.3 27471 27825 91 1 +
+MN908947.3 27808 28145 92 2 +
+MN908947.3 28104 28442 93 1 +
+MN908947.3 28416 28756 94 2 +
+MN908947.3 28699 29041 95 1 +
+MN908947.3 29007 29356 96 2 +
+MN908947.3 29316 29665 97 1 +
+MN908947.3 29510 29836 98 2 +
\ No newline at end of file
b
diff -r 000000000000 -r 373c1735d31f test-data/omicron_ba1_mutations.yaml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/omicron_ba1_mutations.yaml Thu Aug 11 13:50:37 2022 +0000
b
@@ -0,0 +1,30 @@
+variant:
+  voc: VOC-21NOV-01
+  pheuid: pentagon-refining
+  short: om1
+  pangolin: BA.1
+source:
+- https://github.com/cov-lineages/pango-designation/issues/343
+threshold: 10
+mut:
+  # ORF1ab
+  2832: 'A>G' # nsp3:K38R
+  5386: 'T>G' # syn nsp3
+  8393: 'G>A' # nsp3:A1892T
+  11537: 'A>G' # nsp6:I189V
+  13195: 'T>C' # syn nsp10
+  18163: 'A>G' # nsp14:I42V
+  # S
+  22679: 'T>C' # surface glycoprotein:S373P
+  22898: 'G>A' # surface glycoprotein:G446S
+  23048: 'G>A' # surface glycoprotein:G496S
+  23202: 'C>A' # surface glycoprotein:T547K
+  23599: 'T>G' # surface glycoprotein:N679K
+  24130: 'C>A' # surface glycoprotein:N856K
+  24469: 'T>A' # surface glycoprotein:N969K
+  24503: 'C>T' # surface glycoprotein:L981F
+  # M
+  26530: 'A>G' # membrane glycoprotein:D3G
+  26577: 'C>G' # membrane glycoprotein:Q19E
+  # ORF6
+  27259: 'A>C' # syn ORF6 protein
b
diff -r 000000000000 -r 373c1735d31f test-data/tbam11.bam
b
Binary file test-data/tbam11.bam has changed