view BlastParser_and_hits.xml @ 2:36103afa0934 draft

planemo upload for repository commit 22ac2287a510708784dec78647afea4eff658f02
author artbio
date Tue, 19 Jun 2018 05:18:31 -0400
parents 9beb85dba280
children b4c9c085d709
line wrap: on
line source

<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.6.0">
<description>for virus discovery</description>
    python '$__tool_directory__'/
	--sequences '$sequences'
	--blast '$blast'
	--tabularOutput '$tabularOutput'
	--fastaOutput '$fastaOutput'
	--flanking $flanking
	--mode $mode
	## Additional parameters.
    #if $additional_filters.use_filters == "yes":
        --filter_relativeCov $additional_filters.filter_relativeCov
        --filter_maxScore $additional_filters.filter_maxScore
        --filter_meanScore $additional_filters.filter_meanScore
        --filter_term_in "$additional_filters.filter_term_in"
        --filter_term_out "$additional_filters.filter_term_out"
    #end if
    --al_sequences '$al_sequences'
    --un_sequences '$un_sequences'
    --dataset_name "$blast.element_identifier"

	<param name="sequences" type="data" format="fasta"  label="fasta sequences that have been blasted" />
	<param name="blast" type="data" format="tabular" label="The blast output you wish to parse">
            <validator type="expression" message="Blast file must have 13 columns">value.metadata.columns == 13 or value.metadata.columns == 0</validator>
	<param name="flanking" type="integer" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/>
	<param name="mode" type="select" label="Extensive or compact  reporting mode" help="display (extensive)  or not (compact) the oases contigs">
	    <option value="verbose" selected="true">extensive</option>
	    <option value="short">compact</option>
    <conditional name="additional_filters">
            <param name="use_filters" type="select" label="Use Additional Filters?">
                <option value="no">No</option>
                <option value="yes">Yes</option>
            <when value="no">
            <when value="yes">
                <param name="filter_relativeCov" type="float" value="0" max="1" label="Minimum Relative Subject Coverage" help=""/>
                <param name="filter_maxScore" type="float" value="0" label="Minimum maximum BitScore" help=""/>
                <param name="filter_meanScore" type="float" value="0" label="Minimum mean BitScore" help=""/>
                <param name="filter_term_in" type="text" value="" label="filter the subject list with a keyword" help=""/>
                <param name="filter_term_out" type="text" value="" label="filter the subject list excluding a keyword" help=""/>
	<data name="tabularOutput" format="tabular" label="blast analysis, by subjects"/>
	<data name="fastaOutput" format="fasta" label="hits"/>
	<data name="al_sequences" format="fasta" label="Blast aligned sequences"/>
	<data name="un_sequences" format="fasta" label="Blast unaligned sequences"/>

        <param ftype="fasta" name="sequences" value="input.fa" />
        <param ftype="tabular" name="blast" value="" />
        <param name="flanking" value="5" />
        <param name="use_filters" value="no" />
        <param name="mode" value="verbose" />
        <output name="tabularOutput" ftype="tabular" file="" />
        <output name="fastaOutput" ftype="fasta" file="output.fa" />
        <output name="al_sequences" ftype="fasta" file="al_sequences.fa" />
        <output name="un_sequences" ftype="fasta" file="un_sequences.fa" />


**What it does**

Parse blast output for viruses genome assembly.

Takes as inputs

 - 1. the fasta sequences that have been submitted to blast
 - 2. a blast alignment in a tabular format. **Importantly** this tabular output must contains the 12 standard columns (see blast documentation), **plus a column 13** that will report the length of the subject sequence (slen). When you use blast tools prior using this tool, remember to **check the appropriate box** to get the 13th column in the blast tabular output.
 - 3. the numbers of flanking nucleotides to be recovered at the ends of blast hit sequences

The tool returns 4 datasets

 - 1. the fasta input sequences that produced significant blast hits
 - 2. the fasta sequences that did not produced significant blast hits
 - 3. the sequences of the blast hits, plus the flanking sequences (as specified in the tool form). This dataset may be further used in metavisitor workflows to produce contigs of hits.
 - 4. and the parsing of the blast alignments which summarizes the blast results by "subject" sequences (blast analysis, by subjects)

This latter parsing dataset may be customized by tuning the reporting mode and/or using filters