Mercurial > repos > iuc > umi_tools_count
changeset 1:3c932ad4a174 draft
planemo upload commit 9a3aeb2c588f9f67824ea5568923ce70b048499a
author | iuc |
---|---|
date | Sat, 14 Jul 2018 06:14:24 -0400 |
parents | 8db56d2f8b72 |
children | 2cf36d9ea571 |
files | test-data/fc.ENSDARG00000019692.bam test-data/fc.ENSDARG00000019692.counts umi-tools_counts.xml |
diffstat | 3 files changed, 66 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fc.ENSDARG00000019692.counts Sat Jul 14 06:14:24 2018 -0400 @@ -0,0 +1,2 @@ +gene ACCAGA ACGTTG ACTCTG AGACAG AGTGTC ATGTCG CTAGGA GAAGAC GGTAAC TGGTGA +ENSDARG00000019692 2 1 1 1 1 1 1 1 2 1
--- a/umi-tools_counts.xml Thu Jun 21 15:20:14 2018 -0400 +++ b/umi-tools_counts.xml Sat Jul 14 06:14:24 2018 -0400 @@ -1,5 +1,5 @@ -<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.0"> - <description>Count UMIs from BAM files</description> +<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.1"> + <description>performs quantification of UMIs from BAM files</description> <macros> <import>macros.xml</import> <xml name="sanitize_tag" > @@ -10,93 +10,84 @@ </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ - ln -s '${input_bam}' 'input.bam' && ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' && - + umi_tools count - -I input.bam - '$bam_paired' - --extract-umi-method='$barcodes.extract_umi_method.value' - #if $barcodes.extract_umi_method == 'read_id': - --umi-separator='$barcodes.delimiter' - #else if $barcodes.extract_umi_method == 'tag': - --umi-tag='$barcodes.umi_tag' - --cell-tag='$barcodes.cell_tag' - #end if - --method='$grouping_method.value' - --edit-distance-threshold='$hamming_distance' - --mapping-quality='$advanced.mapping_quality' - --per-gene - $wide_format_cell_counts - $advanced.per_contig - '$advanced.per_cell' - #if $advanced.gene_tag: - --gene-tag='$advanced.gene_tag' - #end if - #if $advanced.skip_tags_regex.value: - --skip-tags-regex='$advanced.skip_tags_regex' - #end if - #if $advanced.random_seed != 0: + -I input.bam + '$paired' + --extract-umi-method='$barcodes.extract_umi_method.value' + #if str($barcodes.extract_umi_method) == 'read_id': + --umi-separator='$barcodes.umi_separator.value' + #else if str($barcodes.extract_umi_method) == 'tag': + --umi-tag='$barcodes.umi_tag.value' + --cell-tag='$barcodes.cell_tag.value' + #end if + --method='$method.value' + --edit-distance-threshold='$edit_distance_threshold' + --mapping-quality='$advanced.mapping_quality' + --per-gene + '$wide_format_cell_counts' + '$advanced.per_contig' + '$advanced.per_cell' + #if str($advanced.gene_tag) != "": + --gene-tag='$advanced.gene_tag.value' + #end if + #if str($advanced.skip_tags_regex) != "": + --skip-tags-regex='$advanced.skip_tags_regex.value' + #end if + #if '$advanced.random_seed' != 0: --random-seed='$advanced.random_seed' - #end if - -S '$out_counts' - -L '$out_log' + #end if + -S '$out_counts' ]]></command> <inputs> <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" /> - - <param name="bam_paired" type="boolean" truevalue="--paired" falsevalue="" checked="false" - label="Bam is paired-end" - help="both read pairs will be output. This will also force the use of the template length to determine -reads with the same mapping coordinates." /> - + <param argument="--paired" type="boolean" truevalue="--paired" falsevalue="" checked="false" label="Bam is paired-end" help="both read pairs will be output. This will also force the use of the template length to determine reads with the same mapping coordinates." /> <conditional name="barcodes" > - <param name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" > + <param argument="--extract-umi-method" name="extract_umi_method" type="select" label="Umi Extract Method" help="How are the barcodes encoded in the read?" > <option value="read_id" selected="true">Barcodes are contained at the end of the read seperated by a delimiter</option> <option value="tag" >Barcodes are contained in tags</option> <option value="umis" >Barcodes were extracted using umis</option> </param> <when value="read_id" > - <param name="delimiter" type="text" label="Delimiter between read id and the UMI" value="_" > - <expand macro="sanitize_tag" /> + <param argument="--umi-separator" name="umi_separator" type="text" label="Delimiter between read id and the UMI" value="_" > + <sanitizer invalid_char="" > + <valid initial="string.punctuation" /> + </sanitizer> </param> </when> <when value="tag" > - <param name="umi_tag" type="text" label="Tag which contains the UMI" > + <param argument="--umi-tag" name="umi_tag" type="text" label="Tag which contains the UMI" > <expand macro="sanitize_tag" /> </param> - <param name="cell_tag" type="text" label="Tag which contains the cell barcode" > + <param argument="--cell-tag" name="cell_tag" type="text" label="Tag which contains the cell barcode" > <expand macro="sanitize_tag" /> </param> </when> <when value="umis"></when> </conditional> - - <param name="grouping_method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical -UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with -counts < 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent -UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." > + <param argument="--method" type="select" label="Method to identify group of reads" help="UMIs with the same (or similar) codes can be grouped together. The simplest methods 'unique' and 'percentile' group identical +UMIs, however 'cluster', 'adjacency', and 'directional' can group similar umis with edit distances less than some threshold. Unique: Reads group share the exact same UMI. Percentile: Reads group share the same UMI, and UMIs with +counts < 1% of the median counts for UMIs at the same position are ignored. Cluster: Identify clusters of connected UMIs (based on hamming distance threshold). Adjacency: Same as cluster, but considers only directly ajacent UMIs in the cluster. Directional: Identify cluster of connected UMIs based on hamming distance and umi." > <option value="unique" >Unique</option> <option value="percentile">Percentile</option> <option value="cluster">Cluster</option> <option value="adjacency">Adjacency</option> <option value="directional" selected="true" >Directional</option> </param> - - <param name="hamming_distance" type="integer" label="Edit distance threshold" min="0" value="1" /> - <param name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="false" label="Output a mtrix of genes and cells, instead of a flat file" /> - + <param argument="--edit-distance-threshold" name="edit_distance_threshold" type="integer" label="Edit distance threshold" min="0" value="1" /> + <param argument="--wide-format-cell-counts" name="wide_format_cell_counts" type="boolean" truevalue="--wide-format-cell-counts" falsevalue="" checked="true" label="Output a matrix of genes and cells, instead of a flat file" /> <section name="advanced" title="Extra parameters" > - <param name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" /> + <param argument="--mapping-quality" name="mapping_quality" type="integer" min="0" value="0" label="Minimum mapping quality" /> <!-- Currently hard-coded parameter. Leave here if useful to future wrapper --> - <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library -prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either + <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library +prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either -\-gene-tag or -\-per-contig option" /> --> - <param name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="" > + <param argument="--gene-tag" name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="XT" > <expand macro="sanitize_tag" /> </param> - <param name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" > + <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" > <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="!="/> @@ -116,49 +107,53 @@ </valid> </sanitizer> </param> - <param name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false" - label="Deduplicate per contig (field 3 in BAM; RNAME)" - help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." /> - <param name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="false" - label="Group reads only if they have the same cell barcode." /> - <param name="random_seed" type="integer" min="0" value="0" label="Random Seed" /> - </section> + <param argument="--per-contig" name="per_contig" type="boolean" truevalue="--per-contig" falsevalue="" checked="false" label="Deduplicate per contig (field 3 in BAM; RNAME)" help="All reads with the same contig will be considered to have the same alignment position. This is useful if you have aligned to a reference transcriptome with one transcript per gene." /> + <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="true" label="Group reads only if they have the same cell barcode." /> + <param argument="--random-seed" name="random_seed" type="integer" min="0" value="0" label="Random Seed" /> + </section> </inputs> <outputs> - <data name="out_counts" format="tsv" /> - <data name="out_log" format="txt" /> + <data name="out_counts" format="tabular" /> </outputs> <tests> <test><!--count_single_gene_tag:--> <param name="input_bam" value="chr19_gene_tags.bam" /> <param name="random_seed" value="123456789" /> - <param name="grouping_method" value="directional" /> + <param name="method" value="directional" /> <param name="gene_tag" value="XF" /> <param name="skip_tags_regex" value="^[__|Unassigned]" /> <param name="extract_umi_method" value="umis" /> + <param name="wide_format_cell_counts" value="false" /> + <param name="per_cell" value="false" /> <output name="out_counts" value="count_single_gene_tag.tsv" /> </test> <test><!--count_single_cells_gene_tag:--> <param name="input_bam" value="chr19_gene_tags.bam" /> <param name="random_seed" value="123456789" /> - <param name="grouping_method" value="directional" /> + <param name="method" value="directional" /> <param name="gene_tag" value="XF" /> <param name="skip_tags_regex" value="^[__|Unassigned]" /> - <param name="per_cell" value="true" /><!-- new --> + <param name="per_cell" value="true" /> <param name="extract_umi_method" value="umis" /> + <param name="wide_format_cell_counts" value="false" /> <output name="out_counts" value="count_single_cells_gene_tag.tsv" /> </test> <test><!--count_single_cells_wide_gene_tag:--> <param name="input_bam" value="chr19_gene_tags.bam" /> <param name="random_seed" value="123456789" /> - <param name="grouping_method" value="directional" /> + <param name="method" value="directional" /> <param name="gene_tag" value="XF" /> <param name="skip_tags_regex" value="^[__|Unassigned]" /> - <param name="per_cell" value="true" /><!-- new --> + <param name="per_cell" value="true" /> <param name="extract_umi_method" value="umis" /> <param name="wide_format_cell_counts" value="true" /> <output name="out_counts" value="count_single_cells_gene_tag_wide.tsv" /> </test> + <test><!-- count ENSDARG00000019692, with defaults --> + <param name="input_bam" value="fc.ENSDARG00000019692.bam" /> + <param name="method" value="unique" /> + <output name="out_counts" value="fc.ENSDARG00000019692.counts" /> + </test> </tests> <help><![CDATA[