Mercurial > repos > iuc > umi_tools_extract
diff umi-tools_extract.xml @ 0:418b961e0576 draft
planemo upload commit 453bb3b44d9f27908cbe2677378da88b9f77b5cf
author | iuc |
---|---|
date | Thu, 10 Aug 2017 06:37:09 -0400 |
parents | |
children | 79436b3019e9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/umi-tools_extract.xml Thu Aug 10 06:37:09 2017 -0400 @@ -0,0 +1,255 @@ +<tool id="umi_tools_extract" name="UMI-tools extract" version="0.4.4.0"> + <description>Extract UMI from fastq files</description> + <requirements> + <requirement type="package" version="0.4.4">umi_tools</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #set $gz = False + #if $input_type.type == 'single': + #if $input_type.input_single.is_of_type("fastq.gz", "fastqsanger.gz"): + ln -s '$input_type.input_single' input_single.gz && + #set $gz = True + #end if + #else + #if $input_type.input_read1.is_of_type("fastq.gz", "fastqsanger.gz"): + ln -s '$input_type.input_read1' input_read1.gz && + ln -s '$input_type.input_read2' input_read2.gz && + #set $gz = True + #end if + #end if + umi_tools extract + --bc-pattern='$bc_pattern' + #if $input_type.type == 'single': + #if $gz: + --stdin=input_single.gz + --stdout out.gz + #else + --stdin='$input_type.input_single' + --stdout '$out' + #end if + #else: + #if $gz: + --stdin=input_read1.gz + --read2-in=input_read2.gz + --stdout out1.gz + --read2-out=out2.gz + #else: + --stdin='$input_type.input_read1' + --read2-in='$input_type.input_read2' + --stdout '$out1' + --read2-out='$out2' + #end if + #if $input_type.barcode.split == "1": + --split-barcode + --bc-pattern2='$input_type.barcode.bc_pattern2' + #end if + #end if + #if not $prime3: + --3prime + #end if + #if $quality.quality_selector =='true': + --quality-filter-threshold '$quality.quality_filter_threshold' + --quality-encoding '$quality.quality_encoding' + #end if + #if $print_log == "1": + --log='$out_log' + #else + --supress-stats + #end if + #if $gz: + #if $input_type.type == 'single': + && mv out.gz '$out' + #else + && mv out1.gz '$out1' + && mv out2.gz '$out2' + #end if + #end if + ]]></command> + <inputs> + <conditional name="input_type"> + <param name="type" type="select" label="Library type"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="input_single" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> + </when> + <when value="paired"> + <param name="input_read1" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> + <param name="input_read2" type="data" format="fastq,fastq.gz" label="Reads in FASTQ format" /> + <conditional name="barcode"> + <param name="split" argument="--split-barcode" type="select" label="Barcode on both reads?"> + <option value="0">Barcode on first read only</option> + <option value="1">Barcode on both reads</option> + </param> + <when value="0"> + </when> + <when value="1"> + <param name="bc_pattern2" argument="--bc-pattern2" type="text" value="" label="Barcode pattern for second read" + help="Use this option to specify the format of the UMI/barcode for + the second read pair if required."> + </param> + </when> + </conditional> + </when> + </conditional> + <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read" + help="Use this option to specify the format of the UMI/barcode. Use Ns to + represent the random positions and Xs to indicate the bc positions. + Bases with Ns will be extracted and added to the read name. Remaining + bases, marked with an X will be reattached to the read."> + </param> + <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?" + truevalue="1" falsevalue="0" checked="true" + help="By default the barcode is assumed to be on the 5' end of the read, but + use this option to sepecify that it is on the 3' end instead." /> + <param name="print_log" argument="-L" type="boolean" label="Output log?" + truevalue="1" falsevalue="0" checked="true" + help="Choose if you want to generate a text file containing logging information." /> + <conditional name="quality"> + <param name="quality_selector" type="select" label="Enable quality filter?" > + <option value="false">No</option> + <option value="true">Yes</option> + </param> + <when value="false"> + </when> + <when value="true"> + <param name="quality_filter_threshold" label="Phred score threshold" + type="integer" value="20" argument="--quality-filter-threshold" + help="Remove reads where any UMI base quality score falls below this threshold." /> + <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type" + help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106]."> + <option value="phred33">phred33 [33-77]</option> + <option value="phred64">phred64 [64-106]</option> + <option value="solexa">solexa [59-106]</option> + </param> + </when> + </conditional> + </inputs> + <outputs> + <data name="out" format_source="input_single"> + <filter>input_type['type'] == "single"</filter> + </data> + <data name="out1" format_source="input_read1"> + <filter>input_type['type'] == "paired"</filter> + </data> + <data name="out2" format_source="input_read2"> + <filter>input_type['type'] == "paired"</filter> + </data> + <data name="out_log" format="txt"> + <filter>print_log == True</filter> + </data> + </outputs> + <tests> + <test> + <param name="type" value="single" /> + <param name="input_single" value="t_R1.fastq" ftype="fastq" /> + <param name="bc_pattern" value="XXXNNN" /> + <param name="prime3" value="0" /> + <param name="quality_selector" value="true" /> + <param name="quality_filter_threshold" value="10" /> + <param name="quality_encoding" value="phred33" /> + <output name="out" file="out_SE.fastq" /> + <output name="out_log" file="out_single.log" lines_diff="15"/> + </test> + <test> + <param name="type" value="paired" /> + <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" /> + <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" /> + <param name="bc_pattern" value="NNNXXX" /> + <output name="out1" file="out_R1.fastq.gz" decompress="true" /> + <output name="out2" file="out_R2.fastq.gz" decompress="true" /> + <output name="out_log" file="out_paired.log" lines_diff="10"/> + </test> + </tests> + <help><![CDATA[ + + +UMI-tools extract.py - Extract UMI from fastq +============================================= + +Purpose +------- + +Extract UMI barcode from a read and add it to the read name, leaving +any sample barcode in place. Can deal with paired end reads and UMIs +split across the paired ends + +Options +------- + +--split-barcode + By default the UMI is assumed to be on the first read. Use this + option if the UMI is contained on both reads and specify the + pattern of the barcode/UMI on the second read using the option + ``--bc-pattern2`` + +--bc-pattern + Use this option to specify the format of the UMI/barcode. Use Ns to + represent the random positions and Xs to indicate the bc positions. + Bases with Ns will be extracted and added to the read name. Remaining + bases, marked with an X will be reattached to the read. + + E.g. If the pattern is NNXXNN, + Then the read: + + @HISEQ:87:00000000 read1 + AAGGTTGCTGATTGGATGGGCTAG + DA1AEBFGGCG01DFH00B1FF0B + + + + will become: + @HISEQ:87:00000000_AATT read1 + GGGCTGATTGGATGGGCTAG + 1AFGGCG01DFH00B1FF0B + + + +--bc-pattern2 + Use this option to specify the format of the UMI/barcode for + the second read pair if required. If --bc-pattern2 is not + supplied, this defaults to the same pattern as --bc-pattern + +--3prime + By default the barcode is assumed to be on the 5' end of the read, but + use this option to sepecify that it is on the 3' end instead + +-L + Specify a log file to retain logging information and final statistics + +--split-barcode + barcode is split across read pair + +--quality-filter-threshold=QUALITY_FILTER_THRESHOLD + Remove reads where any UMI base quality score falls + below this threshold +--quality-encoding=QUALITY_ENCODING + Quality score encoding. Choose from phred33[33-77] + phred64 [64-106] or solexa [59-106] + +Usage: +------ + +For single ended reads: + umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS] + +reads from stdin and outputs to stdout. + +For paired end reads: + umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS] + +reads end one from stdin and end two from FASTQIN and outputs end one to stdin +and end two to FASTQOUT. + + ]]></help> + <citations> + <citation type="doi">10.1101/gr.209601.116</citation> + <citation type="bibtex"> + @misc{githubUMI-tools, + title = {UMI-tools}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/CGATOxford/UMI-tools}, + } + </citation> + </citations> +</tool>