Mercurial > repos > jjohnson > fgbio_fastq_to_bam

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fgbio_fastq_to_bam.xml	Sun Feb 21 23:40:09 2021 +0000
@@ -0,0 +1,91 @@
+<tool id="fgbio_fastq_to_bam" name="fgbio FastqToBam" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
+    <description>Generates an unmapped BAM file from fastq files</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <version_command>fgbio --version</version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        fgbio FastqToBam
+        --input
+        #for $input in $inputs
+            '$input'
+        #end for
+        --sample='$sample'
+        --library='$library'
+        #if $read_structures:
+            --read-structures $read_structures
+        #end if
+        --sort='$sort'
+        --output '$output'
+        ## optional bam header content
+        #if $bam_header.umi_tag
+            --umi-tag='$bam_header.umi_tag'
+        #end if
+        #if $bam_header.predicted_insert_size
+            --predicted-insert-size='$bam_header.predicted_insert_size'
+        #end if
+        #if $bam_header.read_group
+            --read-group='$bam_header.read_group'
+        #end if
+        #if $bam_header.description
+            --description='$bam_header.description'
+        #end if
+        #if $bam_header.platform
+            --platform='$bam_header.platform'
+        #end if
+        #if $bam_header.platform_model
+            --platform-model='$bam_header.platform_model'
+        #end if
+        #if $bam_header.platform_model
+            --platform-model='$bam_header.platform_model'
+        #end if
+        #if $bam_header.platform_unit
+            --platform-unit='$bam_header.platform_unit'
+        #end if
+        #if $bam_header.sequencing_center
+            --sequencing-center='$bam_header.sequencing_center'
+        #end if
+        #if $bam_header.comment
+            --comment='$bam_header.comment'
+        #end if
+    ]]></command>
+    <inputs>
+        <param name="inputs" type="data" format="fastq" multiple="true" label="Fastq files corresponding to each sequencing read"/>
+        <param argument="--sample" type="text" value="" label="The name of the sequenced sample">
+        </param>
+        <param argument="--library" type="text" value="" label="The name/ID of the sequenced library">
+        </param>
+        <param argument="--read-structures" type="text" value="" optional="true" label="Read structures, one for each of the FASTQ">
+            <expand macro="read_structures_validator" />
+        </param>
+        <param argument="--sort" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Sort bam by queryname"
+               help="If true, queryname sort the BAM file, otherwise preserve input order."/>
+        <section name="bam_header" title="BAM Header" expanded="false">
+            <param argument="--umi-tag" type="text" value="" optional="true" label="Tag in which to store molecular barcodes/UMIs" help="Default: RX">
+                <expand macro="sam_tag_validator" />
+            </param>
+            <param argument="--predicted-insert-size" type="integer" value="" optional="true" label="Predicted median insert size, to insert into the read group header"/>
+            <param argument="--read-group" type="text" value="" optional="true" label="Read group ID to use in the file header" help="Default: A"/>
+            <param argument="--description" type="text" value="" optional="true" label="Description of the read group"/>
+            <param argument="--platform" type="text" value="" optional="true" label="Sequencing Platform" help="Default: illumina"/>
+            <param argument="--platform-model" type="text" value="" optional="true" label="Platform model to insert into the group header (ex. miseq, hiseq2500, hiseqX)"/>
+            <param argument="--platform-unit" type="text" value="" optional="true" label="Platform unit (e.g. 'flowcell-barcode.lane.sample-barcode')"/>
+            <param argument="--sequencing-center" type="text" value="" optional="true" label="The sequencing center from which the data originated"/>
+            <param argument="--comment" type="text" value="" optional="true" label="Comment to include in the output header"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="output" format="unsorted.bam" />
+    </outputs>
+    <help><![CDATA[
+**fgbio FastqToBam**
+
+Generates an unmapped BAM (or SAM or CRAM) file from fastq files. Takes in one or more fastq files (optionally gzipped), each representing a different sequencing read (e.g. R1, R2, I1 or I2) and can use a set of read structures to allocate bases in those reads to template reads, sample indices, unique molecular indices, or to designate bases to be skipped over.
+
+@READ_STRUCTURES_HELP@
+
+http://fulcrumgenomics.github.io/fgbio/tools/latest/FastqToBam.html
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Sun Feb 21 23:40:09 2021 +0000
@@ -0,0 +1,56 @@
+<macros>
+    <token name="@TOOL_VERSION@">1.3.0</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">fgbio</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@READ_STRUCTURE_PATTERN@">(([1-9][0-9]*[TBMS])*([+]|[1-9][0-9]*)[TBMS])</token>
+    <token name="@READ_STRUCTURES_PATTERN@">@READ_STRUCTURE_PATTERN@(\s@READ_STRUCTURE_PATTERN@)*</token>
+    <xml name="read_structures_validator">
+            <validator type="regex" message="">^@READ_STRUCTURES_PATTERN@$</validator>
+    </xml>
+    <xml name="sam_tag_validator">
+            <validator type="regex" message="">^[A-Za-z][A-Za-z]$</validator>
+    </xml>
+    <xml name="sam_sort_order">
+        <param argument="--sort-order" type="select" optional="true" label="Sort BAM by">
+            <option value="Coordinate">Coordinate</option>
+            <option value="Queryname">Queryname</option>
+            <option value="Random">Random</option>
+            <option value="RandomQuery">RandomQuery</option>
+        </param>
+    </xml>
+
+    <token name="@READ_STRUCTURES_HELP@"><![CDATA[
+**Read Structures**
+
+Read structures are made up of <number><operator> pairs much like the CIGAR string in BAM files. Four kinds of operators are recognized:
+
+    T identifies a template read
+    B identifies a sample barcode read
+    M identifies a unique molecular index read
+    S identifies a set of bases that should be skipped or ignored
+
+The last <number><operator> pair may be specified using a + sign instead of number to denote “all remaining bases”. This is useful if, e.g., fastqs have been trimmed and contain reads of varying length. For example to convert a paired-end run with an index read and where the first 5 bases of R1 are a UMI and the second five bases are monotemplate you might specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S+T +T +B
+
+Alternative if you know your reads are of fixed length you could specify:
+
+::
+
+    --input r1.fq r2.fq i1.fq --read-structures 5M5S65T 75T 8B
+
+
+]]></token>
+    <xml name="citations">
+        <citations>
+            <yield />
+        </citations>
+    </xml>
+</macros>