Mercurial > repos > jjohnson > fgbio_fastq_to_bam
changeset 0:ee774248788f draft
"planemo upload commit 61f6c8e7f32f170ad7e66e46dd74e8c5d361a722"
author | jjohnson |
---|---|
date | Sun, 21 Feb 2021 23:40:09 +0000 |
parents | |
children | 4635a93ebd91 |
files | fgbio_fastq_to_bam.xml macros.xml |
diffstat | 2 files changed, 147 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fgbio_fastq_to_bam.xml Sun Feb 21 23:40:09 2021 +0000 @@ -0,0 +1,91 @@ +<tool id="fgbio_fastq_to_bam" name="fgbio FastqToBam" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5"> + <description>Generates an unmapped BAM file from fastq files</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <version_command>fgbio --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + fgbio FastqToBam + --input + #for $input in $inputs + '$input' + #end for + --sample='$sample' + --library='$library' + #if $read_structures: + --read-structures $read_structures + #end if + --sort='$sort' + --output '$output' + ## optional bam header content + #if $bam_header.umi_tag + --umi-tag='$bam_header.umi_tag' + #end if + #if $bam_header.predicted_insert_size + --predicted-insert-size='$bam_header.predicted_insert_size' + #end if + #if $bam_header.read_group + --read-group='$bam_header.read_group' + #end if + #if $bam_header.description + --description='$bam_header.description' + #end if + #if $bam_header.platform + --platform='$bam_header.platform' + #end if + #if $bam_header.platform_model + --platform-model='$bam_header.platform_model' + #end if + #if $bam_header.platform_model + --platform-model='$bam_header.platform_model' + #end if + #if $bam_header.platform_unit + --platform-unit='$bam_header.platform_unit' + #end if + #if $bam_header.sequencing_center + --sequencing-center='$bam_header.sequencing_center' + #end if + #if $bam_header.comment + --comment='$bam_header.comment' + #end if + ]]></command> + <inputs> + <param name="inputs" type="data" format="fastq" multiple="true" label="Fastq files corresponding to each sequencing read"/> + <param argument="--sample" type="text" value="" label="The name of the sequenced sample"> + </param> + <param argument="--library" type="text" value="" label="The name/ID of the sequenced library"> + </param> + <param argument="--read-structures" type="text" value="" optional="true" label="Read structures, one for each of the FASTQ"> + <expand macro="read_structures_validator" /> + </param> + <param argument="--sort" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Sort bam by queryname" + help="If true, queryname sort the BAM file, otherwise preserve input order."/> + <section name="bam_header" title="BAM Header" expanded="false"> + <param argument="--umi-tag" type="text" value="" optional="true" label="Tag in which to store molecular barcodes/UMIs" help="Default: RX"> + <expand macro="sam_tag_validator" /> + </param> + <param argument="--predicted-insert-size" type="integer" value="" optional="true" label="Predicted median insert size, to insert into the read group header"/> + <param argument="--read-group" type="text" value="" optional="true" label="Read group ID to use in the file header" help="Default: A"/> + <param argument="--description" type="text" value="" optional="true" label="Description of the read group"/> + <param argument="--platform" type="text" value="" optional="true" label="Sequencing Platform" help="Default: illumina"/> + <param argument="--platform-model" type="text" value="" optional="true" label="Platform model to insert into the group header (ex. miseq, hiseq2500, hiseqX)"/> + <param argument="--platform-unit" type="text" value="" optional="true" label="Platform unit (e.g. 'flowcell-barcode.lane.sample-barcode')"/> + <param argument="--sequencing-center" type="text" value="" optional="true" label="The sequencing center from which the data originated"/> + <param argument="--comment" type="text" value="" optional="true" label="Comment to include in the output header"/> + </section> + </inputs> + <outputs> + <data name="output" format="unsorted.bam" /> + </outputs> + <help><![CDATA[ +**fgbio FastqToBam** + +Generates an unmapped BAM (or SAM or CRAM) file from fastq files. Takes in one or more fastq files (optionally gzipped), each representing a different sequencing read (e.g. R1, R2, I1 or I2) and can use a set of read structures to allocate bases in those reads to template reads, sample indices, unique molecular indices, or to designate bases to be skipped over. + +@READ_STRUCTURES_HELP@ + +http://fulcrumgenomics.github.io/fgbio/tools/latest/FastqToBam.html + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Feb 21 23:40:09 2021 +0000 @@ -0,0 +1,56 @@ +<macros> + <token name="@TOOL_VERSION@">1.3.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">fgbio</requirement> + <yield/> + </requirements> + </xml> + <token name="@READ_STRUCTURE_PATTERN@">(([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])</token> + <token name="@READ_STRUCTURES_PATTERN@">@READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*</token> + <xml name="read_structures_validator"> + <validator type="regex" message="">^@READ_STRUCTURES_PATTERN@$</validator> + </xml> + <xml name="sam_tag_validator"> + <validator type="regex" message="">^[A-Za-z][A-Za-z]$</validator> + </xml> + <xml name="sam_sort_order"> + <param argument="--sort-order" type="select" optional="true" label="Sort BAM by"> + <option value="Coordinate">Coordinate</option> + <option value="Queryname">Queryname</option> + <option value="Random">Random</option> + <option value="RandomQuery">RandomQuery</option> + </param> + </xml> + + <token name="@READ_STRUCTURES_HELP@"><![CDATA[ +**Read Structures** + +Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized: + + T identifies a template read + B identifies a sample barcode read + M identifies a unique molecular index read + S identifies a set of bases that should be skipped or ignored + +The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify: + +:: + + --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B + +Alternative if you know your reads are of fixed length you could specify: + +:: + + --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B + + +]]></token> + <xml name="citations"> + <citations> + <yield /> + </citations> + </xml> +</macros>