view BlastParser_and_hits.xml @ 4:d04fc71fb1b3 draft default tip

"planemo upload for repository commit d4b806c1a39ded32ac3aa0d929c5022cca936bd6"
author artbio
date Wed, 10 Mar 2021 16:32:18 +0000
parents b4c9c085d709
line wrap: on
line source

<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.7.0">
<description>for virus discovery</description>
    <requirement type="package" version="3.7.6">python</requirement>
    python '$__tool_directory__'/
	--sequences '$sequences'
	--blast '$blast'
	--tabularOutput '$tabularOutput'
	--fastaOutput '$fastaOutput'
	--flanking $flanking
	--mode $mode
	## Additional parameters.
    #if $additional_filters.use_filters == "yes":
        --filter_relativeCov $additional_filters.filter_relativeCov
        --filter_maxScore $additional_filters.filter_maxScore
        --filter_meanScore $additional_filters.filter_meanScore
        --filter_term_in "$additional_filters.filter_term_in"
        --filter_term_out "$additional_filters.filter_term_out"
    #end if
    --al_sequences '$al_sequences'
    --un_sequences '$un_sequences'
    --dataset_name "$blast.element_identifier"

	<param name="sequences" type="data" format="fasta"  label="fasta sequences that have been blasted" />
	<param name="blast" type="data" format="tabular" label="The blast output you wish to parse">
            <validator type="expression" message="Blast file must have 13 columns">value.metadata.columns == 13 or value.metadata.columns == 0</validator>
	<param name="flanking" type="integer" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/>
	<param name="mode" type="select" label="Extensive or compact  reporting mode" help="display (extensive)  or not (compact) the oases contigs">
	    <option value="verbose" selected="true">extensive</option>
	    <option value="short">compact</option>
    <conditional name="additional_filters">
            <param name="use_filters" type="select" label="Use Additional Filters?">
                <option value="no">No</option>
                <option value="yes">Yes</option>
            <when value="no">
            <when value="yes">
                <param name="filter_relativeCov" type="float" value="0" max="1" label="Minimum Relative Subject Coverage" help=""/>
                <param name="filter_maxScore" type="float" value="0" label="Minimum maximum BitScore" help=""/>
                <param name="filter_meanScore" type="float" value="0" label="Minimum mean BitScore" help=""/>
                <param name="filter_term_in" type="text" value="" label="filter the subject list with a keyword" help=""/>
                <param name="filter_term_out" type="text" value="" label="filter the subject list excluding a keyword" help=""/>
	<data name="tabularOutput" format="tabular" label="blast analysis, by subjects"/>
	<data name="fastaOutput" format="fasta" label="hits"/>
	<data name="al_sequences" format="fasta" label="Blast aligned sequences"/>
	<data name="un_sequences" format="fasta" label="Blast unaligned sequences"/>

        <param ftype="fasta" name="sequences" value="input.fa" />
        <param ftype="tabular" name="blast" value="" />
        <param name="flanking" value="5" />
        <param name="use_filters" value="no" />
        <param name="mode" value="verbose" />
        <output name="tabularOutput" ftype="tabular" file="" />
        <output name="fastaOutput" ftype="fasta" file="output.fa" />
        <output name="al_sequences" ftype="fasta" file="al_sequences.fa" />
        <output name="un_sequences" ftype="fasta" file="un_sequences.fa" />


**What it does**

Parse blast output for viruses genome assembly.

Takes as inputs

 - 1. the fasta sequences that have been submitted to blast
 - 2. a blast alignment in a tabular format. **Importantly** this tabular output must contains the 12 standard columns (see blast documentation), **plus a column 13** that will report the length of the subject sequence (slen). When you use blast tools prior using this tool, remember to **check the appropriate box** to get the 13th column in the blast tabular output.
 - 3. the numbers of flanking nucleotides to be recovered at the ends of blast hit sequences

The tool returns 4 datasets

 - 1. the fasta input sequences that produced significant blast hits
 - 2. the fasta sequences that did not produced significant blast hits
 - 3. the sequences of the blast hits, plus the flanking sequences (as specified in the tool form). This dataset may be further used in metavisitor workflows to produce contigs of hits.
 - 4. and the parsing of the blast alignments which summarizes the blast results by "subject" sequences (blast analysis, by subjects)

This latter parsing dataset may be customized by tuning the reporting mode and/or using filters