Mercurial > repos > iuc > umi_tools_count
changeset 3:b557acca0b56 draft
planemo upload commit a7a086ce7d7d84f53d4a022fa1da25ef7b9a5b9a
author | iuc |
---|---|
date | Fri, 20 Jul 2018 03:50:03 -0400 |
parents | 2cf36d9ea571 |
children | 70cb5527defb |
files | test-data/fc.ENSDARG00000019692.counts.name test-data/fc.ENSDARG00000019692.counts.test umi-tools_counts.xml |
diffstat | 3 files changed, 86 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fc.ENSDARG00000019692.counts.name Fri Jul 20 03:50:03 2018 -0400 @@ -0,0 +1,2 @@ +gene fc_ENSDARG00000019692_ACCAGA fc_ENSDARG00000019692_ACGTTG fc_ENSDARG00000019692_ACTCTG fc_ENSDARG00000019692_AGACAG fc_ENSDARG00000019692_AGTGTC fc_ENSDARG00000019692_ATGTCG fc_ENSDARG00000019692_CTAGGA fc_ENSDARG00000019692_GAAGAC fc_ENSDARG00000019692_GGTAAC fc_ENSDARG00000019692_TGGTGA +ENSDARG00000019692 2 1 1 1 1 1 1 1 2 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fc.ENSDARG00000019692.counts.test Fri Jul 20 03:50:03 2018 -0400 @@ -0,0 +1,2 @@ +gene test_ACCAGA test_ACGTTG test_ACTCTG test_AGACAG test_AGTGTC test_ATGTCG test_CTAGGA test_GAAGAC test_GGTAAC test_TGGTGA +ENSDARG00000019692 2 1 1 1 1 1 1 1 2 1
--- a/umi-tools_counts.xml Mon Jul 16 17:35:28 2018 -0400 +++ b/umi-tools_counts.xml Fri Jul 20 03:50:03 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.1"> +<tool id="umi_tools_count" name="UMI-tools count" version="@VERSION@.2"> <description>performs quantification of UMIs from BAM files</description> <macros> <import>macros.xml</import> @@ -10,36 +10,50 @@ </macros> <expand macro="requirements" /> <command detect_errors="exit_code"><![CDATA[ - ln -s '${input_bam}' 'input.bam' && - ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' && +#import re + +ln -s '${input_bam}' 'input.bam' && +ln -s '${input_bam.metadata.bam_index}' 'input.bam.bai' && - umi_tools count - -I input.bam - '$paired' - --extract-umi-method='$barcodes.extract_umi_method.value' - #if str($barcodes.extract_umi_method) == 'read_id': - --umi-separator='$barcodes.umi_separator.value' - #else if str($barcodes.extract_umi_method) == 'tag': - --umi-tag='$barcodes.umi_tag.value' - --cell-tag='$barcodes.cell_tag.value' - #end if - --method='$method.value' - --edit-distance-threshold='$edit_distance_threshold' - --mapping-quality='$advanced.mapping_quality' - --per-gene - '$wide_format_cell_counts' - '$advanced.per_contig' - '$advanced.per_cell' - #if str($advanced.gene_tag) != "": - --gene-tag='$advanced.gene_tag.value' - #end if - #if str($advanced.skip_tags_regex) != "": - --skip-tags-regex='$advanced.skip_tags_regex.value' - #end if - #if '$advanced.random_seed' != 0: - --random-seed='$advanced.random_seed' - #end if - -S '$out_counts' +umi_tools count + -I input.bam + '$paired' + --extract-umi-method='$barcodes.extract_umi_method.value' +#if str($barcodes.extract_umi_method) == 'read_id': + --umi-separator='$barcodes.umi_separator.value' +#else if str($barcodes.extract_umi_method) == 'tag': + --umi-tag='$barcodes.umi_tag.value' + --cell-tag='$barcodes.cell_tag.value' +#end if + --method='$method.value' + --edit-distance-threshold='$edit_distance_threshold' + --mapping-quality='$advanced.mapping_quality' + --per-gene + '$wide_format_cell_counts' + '$advanced.per_contig' + '$advanced.per_cell' + +#if str($advanced.gene_tag) != "": + --gene-tag='$advanced.gene_tag.value' +#end if +#if str($advanced.skip_tags_regex) != "": + --skip-tags-regex='$advanced.skip_tags_regex.value' +#end if +#if '$advanced.random_seed' != 0: + --random-seed='$advanced.random_seed' +#end if + -S '$out_counts' + + +#if str($cond_extra.prepender) != "none": +#set $replacer = re.sub('[^\w\_]+', '_', str($input_bam.element_identifier.rsplit('.',1)[0])) + #if str($cond_extra.prepender) == "string": +#set $replacer = str($cond_extra.custom_label) + #end if + +&& sed -i -r '1s|\b([ACGT]+)\b|'"$replacer"'_\1|g' '$out_counts' +#end if + ]]></command> <inputs> <param name="input_bam" type="data" format="bam" label="Sorted BAM file" help="Please use the samtools sort tool to ensure a correct BAM input" /> @@ -84,7 +98,7 @@ <!-- <param argument="-\-per-gene" name="per_gene" type="text" label="Group reads together if they have the same gene" help="Reads will be grouped together if they have the same gene. This is useful if your library prep generates PCR duplicates with non-identical alignment positions such as CEL-Seq. Note this option is hardcoded to be on with the count command. I.e counting is always performed per-gene. Must be combined with either -\-gene-tag or -\-per-contig option" /> --> - <param argument="--gene-tag" name="gene_tag" type="text" label="Deduplicate per gene." help="The gene information is encoded in the bam read tag." value="XT" > + <param argument="--gene-tag" name="gene_tag" type="text" label="Deduplicate per gene." value="XT" help="The gene information is encoded in the bam read tag." > <expand macro="sanitize_tag" /> </param> <param argument="--skip-tags-regex" name="skip_tags_regex" type="text" label="Skip any reads where the gene matches this tag" value="" > @@ -111,6 +125,26 @@ <param argument="--per-cell" name="per_cell" type="boolean" truevalue="--per-cell" falsevalue="" checked="true" label="Group reads only if they have the same cell barcode." /> <param argument="--random-seed" name="random_seed" type="integer" min="0" value="0" label="Random Seed" /> </section> + <conditional name="cond_extra" > + <param name="prepender" type="select" label="Prepend a label to all column headers" help="This preserves uniqueness when merging with other files with the same headers. Note: filename must not contain a '.' character" > + <option value="none" selected="true" >No modifications</option> + <option value="string">Custom Label</option> + <option value="dataset name">Dataset Name</option> + </param> + <when value="none"></when> + <when value="dataset name"></when> + <when value="string"> + <param name="custom_label" type="text" label="Label to Prepend" > + <sanitizer invalid_char=""> + <valid initial="string.letters,string.digits"> + <add value="-"/> + <add value="_"/> + <add value="."/> + </valid> + </sanitizer> + </param> + </when> + </conditional> </inputs> <outputs> <data name="out_counts" format="tabular" /> @@ -154,6 +188,23 @@ <param name="method" value="unique" /> <output name="out_counts" value="fc.ENSDARG00000019692.counts" /> </test> + <test><!-- count ENSDARG00000019692, relabel string --> + <param name="input_bam" value="fc.ENSDARG00000019692.bam" /> + <param name="method" value="unique" /> + <conditional name="cond_extra" > + <param name="prepender" value="string" /> + <param name="custom_label" value="test" /> + </conditional> + <output name="out_counts" value="fc.ENSDARG00000019692.counts.test" /> + </test> + <test><!-- count ENSDARG00000019692, relabel filename --> + <param name="input_bam" value="fc.ENSDARG00000019692.bam" /> + <param name="method" value="unique" /> + <conditional name="cond_extra" > + <param name="prepender" value="dataset name" /> + </conditional> + <output name="out_counts" value="fc.ENSDARG00000019692.counts.name" /> + </test> </tests> <help><![CDATA[