Repository 'samtools_ampliconclip'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/samtools_ampliconclip

Changeset 0:a941babb9268 (2022-06-27)
Next changeset 1:5f3ea90dc6ae (2022-08-15)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/samtools_ampliconclip commit 4596e7b08744df85b48d106cf4d44ebdd90dd554
added:
macros.xml
samtools_ampliconclip.xml
test-data/eboVir3.1.bed
test-data/eboVir3.bam
test-data/eboVir3.clipped.bam
test-data/eboVir3.clipped.strand.bam
test-data/eboVir3.clipped.strand_gt30.bam
test-data/eboVir3.hardclipped.bam
test-data/rebuild_output_bams.sh
b
diff -r 000000000000 -r a941babb9268 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 27 20:07:59 2022 +0000
[
b'@@ -0,0 +1,256 @@\n+<macros>\n+    <xml name="requirements">\n+        <requirements>\n+            <requirement type="package" version="@TOOL_VERSION@">samtools</requirement>\n+            <yield/>\n+        </requirements>\n+    </xml>\n+    <token name="@TOOL_VERSION@">1.13</token>\n+    <token name="@PROFILE@">20.05</token>\n+    <token name="@FLAGS@"><![CDATA[\n+        #set $flags = 0\n+        #if $filter\n+            #set $flags = sum(map(int, str($filter).split(\',\')))\n+        #end if\n+    ]]></token>\n+    <token name="@PREPARE_IDX@"><![CDATA[\n+        ##prepare input and indices\n+        ln -s \'$input\' infile &&\n+        #if $input.is_of_type(\'bam\'):\n+            #if str( $input.metadata.bam_index ) != "None":\n+                ln -s \'${input.metadata.bam_index}\' infile.bai &&\n+            #else:\n+                samtools index infile infile.bai &&\n+            #end if\n+        #elif $input.is_of_type(\'cram\'):\n+            #if str( $input.metadata.cram_index ) != "None":\n+                ln -s \'${input.metadata.cram_index}\' infile.crai &&\n+            #else:\n+                samtools index infile infile.crai &&\n+            #end if\n+        #end if\n+    ]]></token>\n+    <token name="@PREPARE_IDX_MULTIPLE@"><![CDATA[\n+        ##prepare input and indices\n+        #for $i, $bam in enumerate( $input_bams ):\n+            ln -s \'$bam\' \'${i}\' &&\n+            #if $bam.is_of_type(\'bam\'):\n+                #if str( $bam.metadata.bam_index ) != "None":\n+                    ln -s \'${bam.metadata.bam_index}\' \'${i}.bai\' &&\n+                #else:\n+                    samtools index \'${i}\' \'${i}.bai\' &&\n+                #end if\n+            #elif $bam.is_of_type(\'cram\'):\n+                #if str( $bam.metadata.cram_index ) != "None":\n+                    ln -s \'${bam.metadata.cram_index}\' \'${i}.crai\' &&\n+                #else:\n+                    samtools index \'${i}\' \'${i}.crai\' &&\n+                #end if\n+            #end if\n+        #end for\n+    ]]></token>\n+    <token name="@PREPARE_FASTA_IDX@"><![CDATA[\n+        ## Make the user-selected reference genome, if any, accessible through\n+        ## a shell variable $reffa, index the reference if necessary, and make\n+        ## the fai-index file available through a shell variable $reffai.\n+\n+        ## For a cached genome simply sets the shell variables to point to the\n+        ## genome file and its precalculated index.\n+        ## For a genome from the user\'s history, if that genome is a plain\n+        ## fasta file, the code creates a symlink in the pwd, creates the fai\n+        ## index file next to it, then sets the shell variables to point to the\n+        ## symlink and its index.\n+        ## For a fasta.gz dataset from the user\'s history, it tries the same,\n+        ## but this will only succeed if the file got compressed with bgzip.\n+        ## For a regular gzipped file samtools faidx will fail, in which case\n+        ## the code falls back to decompressing to plain fasta before\n+        ## reattempting the indexing.\n+        ## Indexing of a bgzipped file produces a regular fai index file *and*\n+        ## a compressed gzi file. The former is identical to the fai index of\n+        ## the uncompressed fasta.\n+\n+        ## If the user has not selected a reference (it\'s an optional parameter\n+        ## in some samtools wrappers), a cheetah boolean use_ref is set to\n+        ## False to encode that fact.\n+\n+        #set use_ref=True\n+        #if $addref_cond.addref_select == "history":\n+            #if $addref_cond.ref.is_of_type(\'fasta\'):\n+                reffa="reference.fa" &&\n+                ln -s \'${addref_cond.ref}\' \\$reffa &&\n+                samtools faidx \\$reffa &&\n+            #else:\n+                reffa="reference.fa.gz" &&\n+                ln -s \'${addref_cond.ref}\' \\$reffa &&\n+                {\n+                    samtools faidx \\$reffa ||\n+                    {\n+                        echo "Failed to index compressed reference. Trying decompressed ..." 1>&2 &&\n+   '..b'ed="@S1024@">Read is a PCR or optical duplicate</option>\n+        <option value="2048" selected="@S2048@">Alignment is supplementary</option>\n+    </xml>\n+\n+    <!-- region specification macros and tokens for tools that allow the specification\n+         of region by bed file / space separated list of regions -->\n+    <token name="@REGIONS_FILE@"><![CDATA[\n+        #if $cond_region.select_region == \'tab\':\n+            -t \'$cond_region.targetregions\'\n+        #end if\n+    ]]></token>\n+    <token name="@REGIONS_MANUAL@"><![CDATA[\n+        #if $cond_region.select_region == \'text\':\n+            #for $i, $x in enumerate($cond_region.regions_repeat):\n+               \'${x.region}\'\n+            #end for\n+        #end if\n+    ]]></token>\n+    <xml name="regions_macro">\n+        <conditional name="cond_region">\n+            <param name="select_region" type="select" label="Filter by regions" help="restricts output to only those alignments which overlap the specified region(s)">\n+                <option value="no" selected="True">No</option>\n+                <option value="text">Manualy specify regions</option>\n+                <option value="tab">Regions from tabular file</option>\n+            </param>\n+            <when value="no"/>\n+            <when value="text">\n+                <repeat name="regions_repeat" min="1" default="1" title="Regions">\n+                    <param name="region" type="text" label="region" help="format chr:from-to">\n+                        <validator type="regex" message="Required format: CHR[:FROM[-TO]]; where CHR: string containing any character except quotes, whitespace and colon; FROM and TO: any integer">^[^\\s\'\\":]+(:\\d+(-\\d+){0,1}){0,1}$</validator>\n+                    </param>\n+                </repeat>\n+            </when>\n+            <when value="tab">\n+                <param name="targetregions" argument="-t/--target-regions" type="data" format="tabular" label="Target regions file" help="Do stats in these regions only. Tab-delimited file chr,from,to (1-based, inclusive)" />\n+            </when>\n+        </conditional>\n+    </xml>\n+\n+    <xml name="citations">\n+        <citations>\n+            <citation type="bibtex">\n+                @misc{SAM_def,\n+                title={Definition of SAM/BAM format},\n+                url = {https://samtools.github.io/hts-specs/},}\n+            </citation>\n+            <citation type="doi">10.1093/bioinformatics/btp352</citation>\n+            <citation type="doi">10.1093/bioinformatics/btr076</citation>\n+            <citation type="doi">10.1093/bioinformatics/btr509</citation>\n+            <citation type="bibtex">\n+                @misc{Danecek_et_al,\n+                Author={Danecek, P., Schiffels, S., Durbin, R.},\n+                title={Multiallelic calling model in bcftools (-m)},\n+                url = {http://samtools.github.io/bcftools/call-m.pdf},}\n+            </citation>\n+            <citation type="bibtex">\n+                @misc{Durbin_VCQC,\n+                Author={Durbin, R.},\n+                title={Segregation based metric for variant call QC},\n+                url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},}\n+            </citation>\n+            <citation type="bibtex">\n+                @misc{Li_SamMath,\n+                Author={Li, H.},\n+                title={Mathematical Notes on SAMtools Algorithms},\n+                url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},}\n+            </citation>\n+            <citation type="bibtex">\n+                @misc{SamTools_github,\n+                title={SAMTools GitHub page},\n+                url = {https://github.com/samtools/samtools},}\n+            </citation>\n+        </citations>\n+    </xml>\n+    <xml name="version_command">\n+        <version_command><![CDATA[samtools 2>&1 | grep Version]]></version_command>\n+    </xml>\n+    <xml name="stdio">\n+        <stdio>\n+            <exit_code range="1:" level="fatal" description="Error" />\n+        </stdio>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r a941babb9268 samtools_ampliconclip.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/samtools_ampliconclip.xml Mon Jun 27 20:07:59 2022 +0000
[
@@ -0,0 +1,83 @@
+<tool id="samtools_ampliconclip" name="Samtools ampliconclip" version="@TOOL_VERSION@" profile="@PROFILE@">
+    <description>clip primer bases from bam files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="stdio"/>
+    <expand macro="version_command"/>
+    <command><![CDATA[
+        @ADDTHREADS@
+        samtools ampliconclip
+            $hard_clip_mode
+            #if $min_length:
+                --fail-len $min_length
+            #end if
+            --tolerance $tolerance
+            $strand
+            -b '${input_bed}'
+            -u 
+            $both_ends 
+            $no_excluded
+            -@ \$addthreads
+            '${input_bam}'
+            | samtools collate -@ \$addthreads -O -u -
+            | samtools fixmate -@ \$addthreads -u - - 
+            | samtools sort -@ \$addthreads -m \${GALAXY_MEMORY_MB:-768}M -T "\${TMPDIR:-.}" -o '${output_bam}'
+    ]]></command>
+    <inputs>
+        <param name="input_bed" type="data" format="bed" label="Genetic intervals (in BED format)" />
+        <param name="input_bam" type="data" format="bam" label="BAM file" />
+        <param name="hard_clip_mode" argument="--hard-clip" type="boolean" checked="false" truevalue="--hard-clip" falsevalue="--soft-clip" label="hard clip" help="hard clip (remove bases), unchekced = default soft-clipping" />
+        <param name="strand" argument="--strand" type="boolean" checked="false" truevalue="--strand" falsevalue="" label="only clip reads that match bed file strand annotation" />
+        <param name="both_ends" argument="--both-ends" type="boolean" checked="false" truevalue="--both-ends" falsevalue="" label="clip both ends of reads (false = 5' only)" />
+        <param name="no_excluded" argument="--no-excluded" type="boolean" checked="false" truevalue="--no-excluded" falsevalue="" label="don't write excluded reads to output (default = write all)" />
+        <param name="min_length" argument="--fail-len" type="integer" min="0" optional="true" label="Min Read length" help="mark reads QCFAIL at this length or shorter after clipping" />
+        <param name="tolerance" argument="--tolerance" type="integer" value="5" min="0"  label="Tolerance" help="match region within this number of bases, default 5." />
+    
+    </inputs>
+    <outputs>
+        <data name="output_bam" format="bam" />
+    </outputs>
+    <tests>
+        <!-- 1) -->
+        <test>
+            <param name="input_bed" value="eboVir3.1.bed" ftype="bed" />
+            <param name="input_bam" value="eboVir3.bam" ftype="bam" />
+            <output name="output_bam" file="eboVir3.clipped.bam" ftype="bam" lines_diff="22" />
+        </test>
+        <!-- 2) testing strand -->
+        <test>
+            <param name="input_bed" value="eboVir3.1.bed" ftype="bed" />
+            <param name="input_bam" value="eboVir3.bam" ftype="bam" />
+            <param name="strand" value="--strand" />
+            <output name="output_bam" file="eboVir3.clipped.strand.bam" ftype="bam" lines_diff="16" />
+        </test>
+        <!-- 3) testing hard clip-->
+        <test>
+            <param name="input_bed" value="eboVir3.1.bed" ftype="bed" />
+            <param name="input_bam" value="eboVir3.bam" ftype="bam" />
+            <param name="hard_clip_mode" value="--hard-clip" />
+            <output name="output_bam" file="eboVir3.hardclipped.bam" ftype="bam" lines_diff="14" />
+        </test>
+        <!-- 4) testing strand and min length-->
+        <test>
+            <param name="input_bed" value="eboVir3.1.bed" ftype="bed" />
+            <param name="input_bam" value="eboVir3.bam" ftype="bam" />
+            <param name="min_length" value="30" />
+            <param name="strand" value="--strand" />
+            <param name="tolerance" value="6" />
+            <param name="both_ends" value="--both-ends" />
+            <param name="no_excluded" value="--no-excluded" />
+            <output name="output_bam" file="eboVir3.clipped.strand_gt30.bam" ftype="bam" lines_diff="13" />
+        </test>
+        
+    </tests>
+    <help>
+**What it does**
+ Clips read alignments where they match BED file defined regions (e.g. for amplicon sequencing).
+
+ samtools ampliconclip -b [INPUT BED] [INPUT BAM1] -o [OUTPUT]
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r a941babb9268 test-data/eboVir3.1.bed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/eboVir3.1.bed Mon Jun 27 20:07:59 2022 +0000
b
@@ -0,0 +1,3 @@
+eboVir3 500 1500 1 0 -
+eboVir3 1500 2000 2 0 +
+eboVir3 1500 3000 3 0 -
b
diff -r 000000000000 -r a941babb9268 test-data/eboVir3.bam
b
Binary file test-data/eboVir3.bam has changed
b
diff -r 000000000000 -r a941babb9268 test-data/eboVir3.clipped.bam
b
Binary file test-data/eboVir3.clipped.bam has changed
b
diff -r 000000000000 -r a941babb9268 test-data/eboVir3.clipped.strand.bam
b
Binary file test-data/eboVir3.clipped.strand.bam has changed
b
diff -r 000000000000 -r a941babb9268 test-data/eboVir3.clipped.strand_gt30.bam
b
Binary file test-data/eboVir3.clipped.strand_gt30.bam has changed
b
diff -r 000000000000 -r a941babb9268 test-data/eboVir3.hardclipped.bam
b
Binary file test-data/eboVir3.hardclipped.bam has changed
b
diff -r 000000000000 -r a941babb9268 test-data/rebuild_output_bams.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/rebuild_output_bams.sh Mon Jun 27 20:07:59 2022 +0000
b
@@ -0,0 +1,4 @@
+samtools ampliconclip -b eboVir3.1.bed eboVir3.bam | samtools collate -@ 0 -O -u - | samtools fixmate -@ 0 -u - - | samtools sort -o eboVir3.clipped.bam
+samtools ampliconclip --strand -b eboVir3.1.bed eboVir3.bam | samtools collate -@ 0 -O -u - | samtools fixmate -@ 0 -u - - | samtools sort -o eboVir3.clipped.strand.bam
+samtools ampliconclip --hard-clip -b eboVir3.1.bed eboVir3.bam | samtools collate -@ 0 -O -u - | samtools fixmate -@ 0 -u - - | samtools sort -o eboVir3.hardclipped.bam
+samtools ampliconclip --both-ends --no-excluded --strand --fail-len 30 -b eboVir3.1.bed eboVir3.bam | samtools collate -@ 0 -O -u - | samtools fixmate -@ 0 -u - - | samtools sort -o eboVir3.clipped.strand_gt30.bam