diff umi-tools_extract.xml @ 0:418b961e0576 draft

planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
author iuc
date Thu, 10 Aug 2017 06:37:09 -0400
parents
children 79436b3019e9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/umi-tools_extract.xml	Thu Aug 10 06:37:09 2017 -0400
@@ -0,0 +1,255 @@
+<tool id="umi_tools_extract" name="UMI-tools extract" version="0.4.4.0">
+    <description>Extract UMI from fastq files</description>
+    <requirements>
+        <requirement type="package" version="0.4.4">umi_tools</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #set $gz = False
+        #if $input_type.type == 'single':
+            #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"):
+                ln -s '$input_type.input_single' input_single.gz &&
+                #set $gz = True
+            #end if
+        #else
+            #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"):
+                ln -s '$input_type.input_read1' input_read1.gz &&
+                ln -s '$input_type.input_read2' input_read2.gz &&
+                #set $gz = True
+            #end if
+        #end if
+        umi_tools extract
+            --bc-pattern='$bc_pattern'
+            #if $input_type.type == 'single':
+                #if $gz:
+                    --stdin=input_single.gz
+                    --stdout out.gz
+                #else
+                    --stdin='$input_type.input_single'
+                    --stdout '$out'
+                #end if
+            #else:
+                #if $gz:
+                    --stdin=input_read1.gz
+                    --read2-in=input_read2.gz
+                    --stdout out1.gz
+                    --read2-out=out2.gz
+                #else:
+                    --stdin='$input_type.input_read1'
+                    --read2-in='$input_type.input_read2'
+                    --stdout '$out1'
+                    --read2-out='$out2'
+                #end if
+                #if $input_type.barcode.split == "1":
+                    --split-barcode
+                    --bc-pattern2='$input_type.barcode.bc_pattern2'
+                #end if
+            #end if
+            #if not $prime3:
+                --3prime
+            #end if
+            #if $quality.quality_selector =='true':
+                --quality-filter-threshold '$quality.quality_filter_threshold'
+                --quality-encoding '$quality.quality_encoding'
+            #end if
+            #if $print_log == "1":
+                --log='$out_log'
+            #else
+                --supress-stats
+            #end if
+        #if $gz:
+            #if $input_type.type == 'single':
+                && mv out.gz '$out'
+            #else
+                && mv out1.gz '$out1'
+                && mv out2.gz '$out2'
+            #end if
+        #end if
+    ]]></command>
+    <inputs>
+        <conditional name="input_type">
+            <param name="type" type="select" label="Library type">
+                <option value="single">Single-end</option>
+                <option value="paired">Paired-end</option>
+            </param>
+            <when value="single">
+                <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
+            </when>
+            <when value="paired">
+                <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
+                <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" />
+                <conditional name="barcode">
+                    <param name="split" argument="--split-barcode" type="select" label="Barcode on both reads?">
+                        <option value="0">Barcode on first read only</option>
+                        <option value="1">Barcode on both reads</option>
+                    </param>
+                    <when value="0">
+                    </when>
+                    <when value="1">
+                        <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read"
+                            help="Use this option to specify the format of the UMI/barcode for
+                                  the second read pair if required.">
+                        </param>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+        <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read"
+            help="Use this option to specify the format of the UMI/barcode. Use Ns to
+                    represent the random positions and Xs to indicate the bc positions.
+                    Bases with Ns will be extracted and added to the read name. Remaining
+                    bases, marked with an X will be reattached to the read.">
+        </param>
+        <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?"
+            truevalue="1" falsevalue="0" checked="true"
+            help="By default the barcode is assumed to be on the 5' end of the read, but
+                use this option to sepecify that it is on the 3' end instead." />
+        <param name="print_log" argument="-L" type="boolean" label="Output log?"
+            truevalue="1" falsevalue="0" checked="true"
+            help="Choose if you want to generate a text file containing logging information." />
+        <conditional name="quality">
+            <param name="quality_selector" type="select" label="Enable quality filter?" >
+                <option value="false">No</option>
+                <option value="true">Yes</option>
+            </param>
+            <when value="false">
+            </when>
+            <when value="true">
+                <param name="quality_filter_threshold" label="Phred score threshold"
+                    type="integer" value="20" argument="--quality-filter-threshold"
+                    help="Remove reads where any UMI base quality score falls below this threshold." />
+                <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type"
+                    help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106].">
+                    <option value="phred33">phred33 [33-77]</option>
+                    <option value="phred64">phred64 [64-106]</option>
+                    <option value="solexa">solexa [59-106]</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out" format_source="input_single">
+            <filter>input_type['type'] == "single"</filter>
+        </data>
+        <data name="out1" format_source="input_read1">
+            <filter>input_type['type'] == "paired"</filter>
+        </data>
+        <data name="out2" format_source="input_read2">
+            <filter>input_type['type'] == "paired"</filter>
+        </data>
+        <data name="out_log" format="txt">
+            <filter>print_log == True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="type" value="single" />
+            <param name="input_single" value="t_R1.fastq" ftype="fastq" />
+            <param name="bc_pattern" value="XXXNNN" />
+            <param name="prime3" value="0" />
+            <param name="quality_selector" value="true" />
+            <param name="quality_filter_threshold" value="10" />
+            <param name="quality_encoding" value="phred33" />
+            <output name="out" file="out_SE.fastq" />
+            <output name="out_log" file="out_single.log" lines_diff="15"/>
+        </test>
+        <test>
+            <param name="type" value="paired" />
+            <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" />
+            <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" />
+            <param name="bc_pattern" value="NNNXXX" />
+            <output name="out1" file="out_R1.fastq.gz" decompress="true" />
+            <output name="out2" file="out_R2.fastq.gz" decompress="true" />
+            <output name="out_log" file="out_paired.log" lines_diff="10"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+
+UMI-tools extract.py - Extract UMI from fastq
+=============================================
+
+Purpose
+-------
+
+Extract UMI barcode from a read and add it to the read name, leaving
+any sample barcode in place. Can deal with paired end reads and UMIs
+split across the paired ends
+
+Options
+-------
+
+--split-barcode
+       By default the UMI is assumed to be on the first read. Use this
+       option if the UMI is contained on both reads and specify the
+       pattern of the barcode/UMI on the second read using the option
+       ``--bc-pattern2``
+
+--bc-pattern
+       Use this option to specify the format of the UMI/barcode. Use Ns to
+       represent the random positions and Xs to indicate the bc positions.
+       Bases with Ns will be extracted and added to the read name. Remaining
+       bases, marked with an X will be reattached to the read.
+
+       E.g. If the pattern is NNXXNN,
+       Then the read:
+
+       @HISEQ:87:00000000 read1
+       AAGGTTGCTGATTGGATGGGCTAG
+       DA1AEBFGGCG01DFH00B1FF0B
+       +
+
+       will become:
+       @HISEQ:87:00000000_AATT read1
+       GGGCTGATTGGATGGGCTAG
+       1AFGGCG01DFH00B1FF0B
+       +
+
+--bc-pattern2
+       Use this option to specify the format of the UMI/barcode for
+       the second read pair if required. If --bc-pattern2 is not
+       supplied, this defaults to the same pattern as --bc-pattern
+
+--3prime
+       By default the barcode is assumed to be on the 5' end of the read, but
+       use this option to sepecify that it is on the 3' end instead
+
+-L
+       Specify a log file to retain logging information and final statistics
+
+--split-barcode
+       barcode is split across read pair
+
+--quality-filter-threshold=QUALITY_FILTER_THRESHOLD
+       Remove reads where any UMI base quality score falls
+       below this threshold
+--quality-encoding=QUALITY_ENCODING
+       Quality score encoding. Choose from phred33[33-77]
+       phred64 [64-106] or solexa [59-106]
+
+Usage:
+------
+
+For single ended reads:
+        umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS]
+
+reads from stdin and outputs to stdout.
+
+For paired end reads:
+        umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS]
+
+reads end one from stdin and end two from FASTQIN and outputs end one to stdin
+and end two to FASTQOUT.
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1101/gr.209601.116</citation>
+        <citation type="bibtex">
+            @misc{githubUMI-tools,
+            title = {UMI-tools},
+            publisher = {GitHub},
+            journal = {GitHub repository},
+            url = {https://github.com/CGATOxford/UMI-tools},
+            }
+        </citation>
+    </citations>
+</tool>