umi_tools_count: umi-tools_counts.xml comparison

comparison umi-tools_counts.xml @ 0:8db56d2f8b72 draft

planemo upload commit c79a5f4a05156bb2a6035a844aa9ad8f0e59ecb5

author	iuc
date	Thu, 21 Jun 2018 15:20:14 -0400
parents
children	3c932ad4a174

comparison

equal deleted inserted replaced

--1:000000000000
+:8db56d2f8b72
+<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.0">
+<description>Count UMIs from BAM files</description>
+<macros>
+<import>macros.xml</import>
+<xml name="sanitize_tag" >
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits" />
+</sanitizer>
+</xml>
+</macros>
+<expand macro="requirements" />
+<command detect_errors="exit_code"><![CDATA[
+ln -s '${input_bam}' 'input.bam' &&
+ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' &&
+umi_tools count
+-I input.bam
+'$bam_paired'
+--extract-umi-method='$barcodes.extract_umi_method.value'
+#if $barcodes.extract_umi_method == 'read_id':
+--umi-separator='$barcodes.delimiter'
+#else if $barcodes.extract_umi_method == 'tag':
+--umi-tag='$barcodes.umi_tag'
+--cell-tag='$barcodes.cell_tag'
+#end if
+--method='$grouping_method.value'
+--edit-distance-threshold='$hamming_distance'
+--mapping-quality='$advanced.mapping_quality'
+--per-gene
+$wide_format_cell_counts
+$advanced.per_contig
+'$advanced.per_cell'
+#if $advanced.gene_tag:
+--gene-tag='$advanced.gene_tag'
+#end if
+#if $advanced.skip_tags_regex.value:
+--skip-tags-regex='$advanced.skip_tags_regex'
+#end if
+#if $advanced.random_seed != 0:
+--random-seed='$advanced.random_seed'
+#end if
+-S '$out_counts'
+-L '$out_log'
+]]></command>
+<inputs>
+<param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" />
+<param name="bam_paired" type="boolean" truevalue="--paired" falsevalue="" checked="false"
+label="Bam is paired-end"
+help="both read pairs will be output. This will also force the use of the template length to determine
+reads with the same mapping coordinates." />
+<conditional name="barcodes" >
+<param name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" >
+<option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option>
+<option value="tag" >Barcodes are contained in tags</option>
+<option value="umis" >Barcodes were extracted using umis</option>
+</param>
+<when value="read_id" >
+<param name="delimiter" type="text" label="Delimiter between read id and the UMI" value="_" >
+<expand macro="sanitize_tag" />
+</param>
+</when>
+<when value="tag" >
+<param name="umi_tag" type="text" label="Tag which contains the UMI" >
+<expand macro="sanitize_tag" />
+</param>
+<param name="cell_tag" type="text" label="Tag which contains the cell barcode" >
+<expand macro="sanitize_tag" />
+</param>
+</when>
+<when value="umis"></when>
+</conditional>
+<param name="grouping_method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical
+UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with
+counts &lt; 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent
+UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." >
+<option value="unique" >Unique</option>
+<option value="percentile">Percentile</option>
+<option value="cluster">Cluster</option>
+<option value="adjacency">Adjacency</option>
+<option value="directional" selected="true" >Directional</option>
+</param>
+<param name="hamming_distance" type="integer" label="Edit distance threshold" min="0" value="1" />
+<param name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="false" label="Output a mtrix of genes and cells, instead of a flat file" />
+<section name="advanced" title="Extra parameters" >
+<param name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" />
+<!-- Currently hard-coded parameter. Leave here if useful to future wrapper  -->
+<!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library
+prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either
+-\-gene-tag or -\-per-contig option" /> -->
+<param name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="" >
+<expand macro="sanitize_tag" />
+</param>
+<param name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" >
+<sanitizer invalid_char="">
+<valid initial="string.letters,string.digits">
+<add value="!="/>
+<add value="-"/>
+<add value="_"/>
+<add value="."/>
+<add value="?"/>
+<add value="&lt;"/><!-- left triangle bracket -->
+<add value="&gt;"/><!-- right triangle bracket -->
+<add value="&#91;"/> <!-- left square bracket -->
+<add value="&#93;"/> <!-- right square bracket -->
+<add value="&#94;"/> <!-- caret -->
+<add value="&#123;"/> <!-- left curly -->
+<add value="&#125;"/> <!-- right curly -->
+<add value="&#40;"/> <!-- left parenthesis -->
+<add value="&#41;"/> <!-- right parenthesis -->
+</valid>
+</sanitizer>
+</param>
+<param name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false"
+label="Deduplicate per contig (field 3 in BAM; RNAME)"
+help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." />
+<param name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="false"
+label="Group reads only if they have the same cell barcode." />
+<param name="random_seed" type="integer" min="0" value="0" label="Random Seed" />
+</section>
+</inputs>
+<outputs>
+<data name="out_counts" format="tsv" />
+<data name="out_log" format="txt" />
+</outputs>
+<tests>
+<test><!--count_single_gene_tag:-->
+<param name="input_bam" value="chr19_gene_tags.bam" />
+<param name="random_seed" value="123456789" />
+<param name="grouping_method" value="directional" />
+<param name="gene_tag" value="XF" />
+<param name="skip_tags_regex" value="^[__|Unassigned]" />
+<param name="extract_umi_method" value="umis" />
+<output name="out_counts" value="count_single_gene_tag.tsv" />
+</test>
+<test><!--count_single_cells_gene_tag:-->
+<param name="input_bam" value="chr19_gene_tags.bam" />
+<param name="random_seed" value="123456789" />
+<param name="grouping_method" value="directional" />
+<param name="gene_tag" value="XF" />
+<param name="skip_tags_regex" value="^[__|Unassigned]" />
+<param name="per_cell" value="true" /><!-- new -->
+<param name="extract_umi_method" value="umis" />
+<output name="out_counts" value="count_single_cells_gene_tag.tsv" />
+</test>
+<test><!--count_single_cells_wide_gene_tag:-->
+<param name="input_bam" value="chr19_gene_tags.bam" />
+<param name="random_seed" value="123456789" />
+<param name="grouping_method" value="directional" />
+<param name="gene_tag" value="XF" />
+<param name="skip_tags_regex" value="^[__|Unassigned]" />
+<param name="per_cell" value="true" /><!-- new -->
+<param name="extract_umi_method" value="umis" />
+<param name="wide_format_cell_counts" value="true" />
+<output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" />
+</test>
+</tests>
+<help><![CDATA[
+UMI Tools count - Count reads per gene from BAM using UMIs
+----------------------------------------------------------
+Purpose
+-------
+The purpose of this command is to count the number of reads per gene based
+on the mapping co-ordinate and the UMI attached to the read.
+It is assumed that the FASTQ files were processed with extract_umi.py
+before mapping and thus the UMI is the last word of the read name. e.g:
+@HISEQ:87:00000000_AATT
+where AATT is the UMI sequeuence.
+If you have used an alternative method which does not separate the
+read id and UMI with a "_", such as bcl2fastq which uses ":", you can
+specify the separator, or if your UMIs are encoded in a tag you can also specify this.
+]]></help>
+<expand macro="citations" />
+</tool>

Mercurial > repos > iuc > umi_tools_count

comparison umi-tools_counts.xml @ 0:8db56d2f8b72 draft