view cpt_intron_detect/intron_detection.xml @ 2:e0e294a1ccdd draft

Uploaded
author cpt
date Fri, 20 May 2022 08:07:39 +0000
parents 1a19092729be
children 41117b5ba508
line wrap: on
line source

<?xml version="1.1"?>
<tool id="edu.tamu.cpt2.phage.intron_detection" name="Interrupted gene detection tool" version="19.1.0.0">
  <description>based on nearby protein blast results</description>
  <macros>
    <import>macros.xml</import>
		<import>cpt-macros.xml</import>
  </macros>
  <requirements>
    <requirement type="package" version="3.8.13">python</requirement>
    <requirement type="package" version="1.77">biopython</requirement>
    <requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
  </requirements>
  <command detect_errors="aggressive"><![CDATA[
python $__tool_directory__/intron_detection.py

"$gff3_data"
"$blast"
--minimum $minimum
--maximum $maximum
--idThresh $idThresh
> $output]]></command>
  <inputs>
    <expand macro="gff3_input" />
    <param label="Blastp Results" name="blast" type="data" format="blastxml"/>
    <param label="Separation Minimum" name="minimum" type="text" value="-1"
        help="Minimum number of amino acids a CDS pair must be separated by to detect an intron. Set to a negative number to instead allow CDS to overlap by up to that many AAs."/>
    <param label="Separation Maximum" name="maximum" type="integer" value="10000"
        help="Maximum number of bases a CDS pair may be separated by to detect an intron (eg, distance from end of 1 cds to the start of another)."/>
    <param label="Percent Identity Threshold" name="idThresh" type="float" value=".30"
        help="Minimum Identity Percentage for HSP to be considered for intron comparison (0.00 to 1.00)"/>
  </inputs>
  <outputs>
    <data format="gff3" name="output" label="Potential interrupted genes"/>
  </outputs>
  <tests>
      <test>
          <param name="gff3_data" value="T7_CLEAN.gff3"/>
          <param name="blast" value="T7_NR.blastxml"/>
          <param name="minimum" value="10" />
          <param name="maximum" value="2000" />
          <param name="idThresh" value="0.3" />
          <output name="output" file="T7_INTRON.gff3"/>
      </test>
  </tests>
  <help><![CDATA[
**What it does**

This tool parses BLAST XML results and returns potential intron regions by 
identifying query proteins with BLAST hits to the same target protein within 
the maximum number of bases set above. This distance will also wraparound the 
ends of the genome. The list of hits is returned as a gff3.

(Tool formerly named Intron Detection, updated Nov 2020)

      ]]></help>
		<expand macro="citations-2020" />
</tool>