Mercurial > repos > jjohnson > fgbio_group_reads_by_umi
changeset 1:568816308859 draft
"planemo upload commit 692ea558cbbefee93859dc2b005fab5ac4970eb8"
author | jjohnson |
---|---|
date | Wed, 24 Feb 2021 13:02:09 +0000 |
parents | 900cd2865768 |
children | 9efbd847e5d1 |
files | fgbio_group_reads_by_umi.xml macros.xml macros.xml.bak |
diffstat | 3 files changed, 80 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/fgbio_group_reads_by_umi.xml Sun Feb 21 23:40:34 2021 +0000 +++ b/fgbio_group_reads_by_umi.xml Wed Feb 24 13:02:09 2021 +0000 @@ -51,10 +51,10 @@ </help> </param> <param argument="--min-map-q" type="integer" value="" optional="true" label="Minimum mapping quality" help="Default: 30"/> - <param argument="--raw-tag" type="text" value="" label="The tag containing the raw UMI" help="Default: RX"> + <param argument="--raw-tag" type="text" value="" optional="true" label="The tag containing the raw UMI" help="Default: RX"> <expand macro="sam_tag_validator"/> </param> - <param argument="--assign-tag" type="text" value="" label="The output tag for UMI grouping" help="Default: MI"> + <param argument="--assign-tag" type="text" value="" optional="true" label="The output tag for UMI grouping" help="Default: MI"> <expand macro="sam_tag_validator"/> </param> <param argument="--include-non-pf-reads" type="select" value="true" optional="true" label="Include non-PF reads"> @@ -68,8 +68,14 @@ <outputs> <data name="family_size_histogram" format="tabular" > <filter>output_counts == True</filter> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="family_size,count,fraction,fraction_gt_or_eq_family_size" /> + </actions> </data> - <data name="output" format="bam" /> + <data name="output" format="unsorted.bam" > + <expand macro="sort_order_change_format" /> + </data> </outputs> <help><![CDATA[ **fgbio GroupReadsByUmi**
--- a/macros.xml Sun Feb 21 23:40:34 2021 +0000 +++ b/macros.xml Wed Feb 24 13:02:09 2021 +0000 @@ -17,22 +17,33 @@ </xml> <xml name="sam_sort_order"> <param argument="--sort-order" type="select" optional="true" label="Sort BAM by"> + <option value="TemplateCoordinate">TemplateCoordinate</option> <option value="Coordinate">Coordinate</option> <option value="Queryname">Queryname</option> <option value="Random">Random</option> <option value="RandomQuery">RandomQuery</option> </param> </xml> + + <xml name="sort_order_change_format"> + <change_format> + <when input="sort_order" value="Coordinate" format="bam" /> + <when input="sort_order" value="TemplateCoordinate" format="bam" /> + <when input="sort_order" value="QueryName" format="unsorted.bam" /> + <when input="sort_order" value="Random" format="unsorted.bam" /> + <when input="sort_order" value="RandomQuery" format="unsorted.bam" /> + </change_format> + </xml> <token name="@READ_STRUCTURES_HELP@"><![CDATA[ **Read Structures** Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized: - T identifies a template read - B identifies a sample barcode read - M identifies a unique molecular index read - S identifies a set of bases that should be skipped or ignored + - T identifies a template read + - B identifies a sample barcode read + - M identifies a unique molecular index read + - S identifies a set of bases that should be skipped or ignored The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml.bak Wed Feb 24 13:02:09 2021 +0000 @@ -0,0 +1,56 @@ +<macros> + <token name="@TOOL_VERSION@">1.3.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">fgbio</requirement> + <yield/> + </requirements> + </xml> + <token name="@READ_STRUCTURE_PATTERN@">(([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])</token> + <token name="@READ_STRUCTURES_PATTERN@">@READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*</token> + <xml name="read_structures_validator"> + <validator type="regex" message="">^@READ_STRUCTURES_PATTERN@$</validator> + </xml> + <xml name="sam_tag_validator"> + <validator type="regex" message="">^[A-Za-z][A-Za-z]$</validator> + </xml> + <xml name="sam_sort_order"> + <param argument="--sort-order" type="select" optional="true" label="Sort BAM by"> + <option value="Coordinate">Coordinate</option> + <option value="Queryname">Queryname</option> + <option value="Random">Random</option> + <option value="RandomQuery">RandomQuery</option> + </param> + </xml> + + <token name="@READ_STRUCTURES_HELP@"><![CDATA[ +**Read Structures** + +Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized: + + - T identifies a template read + - B identifies a sample barcode read + - M identifies a unique molecular index read + - S identifies a set of bases that should be skipped or ignored + +The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify: + +:: + + --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B + +Alternative if you know your reads are of fixed length you could specify: + +:: + + --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B + + +]]></token> + <xml name="citations"> + <citations> + <yield /> + </citations> + </xml> +</macros>