view mitobim.xml @ 0:ea46115c7f1e draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mitobim commit 28d5ff6fe8faf9d3716a79c88b093dabd4aa82a8"
author iuc
date Sun, 03 Jan 2021 18:21:12 +0000
parents
children
line wrap: on
line source

<tool id="mitobim" name="MITObim" version="@TOOL_VERSION@" profile="20.01">
    <description>mitochondrial baiting and iterative mapping</description>
    <macros>
        <token name="@TOOL_VERSION@">1.9.1</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">mitobim</requirement>
    </requirements>
    <version_command>MITObim.pl --version</version_command>
    <command detect_errors="exit_code"><![CDATA[
    export LC_ALL=C &&
    cp '$readpool' readpool.fastq &&
    cp '$reference' reference.$reference.ext &&

    MITObim.pl
    #if str($start) != ""
        --start $start
    #end if
    #if str($end) != ""
        --end $end
    #end if
    --sample '$sample'
    --ref '$ref'
    --readpool readpool.fastq
    #if $reference.ext == "fasta"
        --quick
    #else
        --maf
    #end if  
    reference.$reference.ext

    --kbait $kbait
    --platform $platform
    $denovo
    $pair
    $split
    $trimreads
    $trimoverhang
    #if str($mismatch) != ""
        --mismatch $mismatch
    #end if
    #if str($min_cov) != ""
        --min_cov $min_cov
    #end if
    #if str($min_len) != ""
        --min_len $min_len
    #end if
    ## does not work with 3rd test --redirect_tmp \${TEMP:-\$_GALAXY_JOB_TMP_DIR}
    --NFS_warn_only | tee log.txt &&

    if grep -q "^Final assembly result will be written to file:" log.txt; then
        cp \$(grep "^Final assembly result will be written to file:" log.txt | cut -d":" -f 2) '$assembly';
    else
        exit 1;
    fi
    ]]></command>
    <inputs>
        <param name="reference" type="data" format="fasta,maf" label="Reference" help="If a FASTA sequence is given it will be used  as bait (using the --quick option). If a MAF alignment is given the reference is extracted from the alignment (using the --maf option)"/>
        <param argument="--ref" type="text" value="" label="Reference ID" help="If resuming, use the same as in previous iteration/initial MIRA assembly">
            <validator type="regex" message="&quot;, &apos;, and / are not allowed in IDs">[^'\"/]*$</validator>
	</param>
        <param argument="--readpool" type="data" format="fastq,fastq.gz" label="Read pool" help="Read pairs need to be interleaved for full functionality of the '-pair' option below"/>
	<param argument="--sample" type="text" value="" label="Sample ID" help="If resuming, the sampleID needs to be identical to that of the previous iteration / MIRA assembly">
            <validator type="regex" message="&quot;, &apos;, and / are not allowed in IDs">[^'\"/]*$</validator>
	</param>
        <param argument="--start" type="integer" value="" optional="true" label="Iteration to start with" help="Defaults to 0 when using '--quick' reference"/>
        <param argument="--end" type="integer" value="" optional="true" label="Iteration to end with" help="Default=startiteration, i.e. if not specified otherwise stop after 1 iteration"/>
        <param argument="--kbait" type="integer" value="31" min="1" max="256" label="kmer for baiting stringency" help=""/>
        <param argument="--platform" type="select" label="Sequencing platform" help="">
            <option value="solexa">Solexa</option>
            <option value="iontor">ION Torrent</option>
            <option value="454">445</option>
            <option value="pacbio">PacBIO</option>
        </param>
        <param argument="--denovo" type="boolean" checked="false" truevalue="--denovo" falsevalue="" label="Run MIRA in denovo mode" help=""/>
        <param argument="--pair" type="boolean" checked="false" truevalue="--pair" falsevalue="" label="Extend readpool to contain full read pairs, even if only one member was baited" help="Relies on /1 and /2 header convention for read pairs"/>
        <param argument="--split" type="boolean" checked="false" truevalue="--split" falsevalue="" label="Split reference at positions with more than 5N" help=""/>

        <param argument="--trimreads" type="boolean" checked="false" truevalue="--trimreads" falsevalue="" label="Trim data" help="It is recommended to trim beforehand and feed MITObim with pre trimmed data"/>
        <param argument="--trimoverhang" type="boolean" checked="false" truevalue="--trimoverhang" falsevalue="" label="Trim overhang" help=" up- and downstream of reference. That is don't extend the bait"/>
        <param argument="--mismatch" type="integer" value="" optional="true" label="Number of allowed mismatches in mapping" help="Only for illumina data (default: 15% of avg. read length)"/>
        <param argument="--min_cov" type="integer" value="" optional="true" label="Minimum average coverage of contigs to be retained" help=""/>
        <param argument="--min_len" type="integer" value="" optional="true" label="Minimum length of contig to be retained as backbone" help=""/>
    </inputs>
    <outputs>
        <data format="fasta" name="assembly"/>
    </outputs>
    <tests>
        <!--MITObim.pl -start 1 -end 10 -sample testpool -ref Salpinus_mt_genome -readpool reads.fastq -maf initial-mapping-testpool-to-Salpinus-mt_assembly/initial-mapping-testpool-to-Salpinus-mt_d_results/initial-mapping-testpool-to-Salpinus-mt_out.maf &> log-->
        <test>
            <param name="readpool" ftype="fastqsanger" value="Tthymallus-150bp-300sd50-interleaved.fastq"/>
	    <param name="sample" value="testpool"/>
            <param name="reference" ftype="maf" value="initial-mapping-testpool-to-Salpinus-mt_out.maf"/>
	    <param name="ref" value="Salpinus_mt_genome"/>
            <param name="start" value="1"/>
            <param name="end" value="10"/>
            <output name="assembly" value="ex1.fasta" ftype="fasta" compare="diff"/>
            <assert_command>
                 <has_text text="--maf"/>
            </assert_command>
        </test>
        <!---start 1 -end 30 -sample testpool -ref Salpinus_mt_genome -readpool ~/PATH/TO/testdata1/Tthymallus-150bp-300sd50-interleaved.fastq \-\-quick ~/PATH/TO/testdata1/Salpinus-mt-genome-NC_000861.fasta &> log-->
        <test>
            <param name="readpool" ftype="fastqsanger" value="Tthymallus-150bp-300sd50-interleaved.fastq"/>
	    <param name="sample" value="testpool"/>
            <param name="reference" ftype="fasta" value="Salpinus-mt-genome-NC_000861.fasta"/>
            <param name="ref" value="Salpinus_mt_genome"/>
            <param name="start" value="1"/>
            <param name="end" value="30"/>
            <output name="assembly" value="ex2.fasta" ftype="fasta" compare="diff"/>
            <assert_command>
                 <has_text text="--quick"/>
            </assert_command>
        </test>
        <!--~/PATH/TO/MITObim.pl -sample testpool -ref Tthymallus-COI -readpool ~/PATH/TO/testdata1/Tthymallus-150bp-300sd50-interleaved.fastq \-\-quick ~/PATH/TO/testdata1/Tthymallus-COI-partial-HQ961018.fasta -end 50 \-\-denovo \-\-paired \-\-clean &> log-->
        <test>
            <param name="readpool" ftype="fastq" value="Tthymallus-150bp-300sd50-interleaved.fastq"/>
            <param name="sample" value="testpool"/>
            <param name="reference" ftype="fasta" value="Tthymallus-COI-partial-HQ961018.fasta"/>
            <param name="ref" value="Tthymallus-COI"/>
            <param name="end" value="50"/>
            <param name="pair" value="true"/>
            <output name="assembly" value="ex3.fasta" ftype="fasta" compare="diff"/>
            <assert_command>
                 <has_text text="--quick"/>
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
The MITObim procedure (mitochondrial baiting and iterative mapping) represents a highly efficient approach to assembling novel mitochondrial genomes of non-model organisms directly from total genomic DNA derived NGS reads. Labor intensive long-range PCR steps prior to sequencing are no longer required. MITObim is capable of reconstructing mitochondrial genomes without the need of a reference genome of the targeted species by relying solely on (a) mitochondrial genome information of more distantly related taxa or (b) short mitochondrial barcoding sequences (seeds), such as the commonly used cytochrome-oxidase subunit 1 (COI), as a starting reference.

The script is performing three steps and iteratively repeating them: (i) Deriving reference sequence from previous mapping assembly, (ii) in silico baiting using the newly derived reference (iii) previously fished reads are mapped to the newly derived reference leading to an extension of the reference sequence.

For more details please refer to Hahn et al. 2013. Detailed examples are demonstrated in the TUTORIALS section here https://github.com/chrishah/MITObim
    ]]></help>
    <citations>
        <citation type="doi">10.1093/nar/gkt371</citation>
    </citations>
</tool>