view MISA/clean_fasta_header.xml @ 0:3006582bfc76

Uploaded V1.0 MISA tools and helper scripts
author john-mccallum
date Wed, 14 Sep 2011 23:57:57 -0400
parents
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="clean_fasta_header_1" name="Clean fasta header">
  <description>Removes fasta description fields in header </description>
  <command>sed 's/\(>\w*\)\s*.*/\1/'  $inputFastaFile > $fasta_outputfile</command>
  <inputs>
    <param format="fasta" name="inputFastaFile" type="data" label="fasta File"/>
  </inputs>
  <outputs>
     <data format="fasta"  name="fasta_outputfile" />
  </outputs>
<help>
.. class:: infomark

**TIP**

This tool requires *fasta* format.

It simply removes any additional definition strings from the header line prior to using tools that dont handle these.

----

**Example**

--Query sequence

::

 &gt;contig00001  gene=isogroup00001  length=2159
 tttAaGCATTTAACACTGCATATTGATTGATATAGTTGTTCAGTACAAGCCAATTACATT
 GTAGACATAAAACAAAGCATTCGAAACAGTTGAAATTTTGATTCCTCTATACTGGATCAG
 GCGGTAATCA
 &gt;contig00003  gene=isogroup00001  length=2206  
 ggTGGCTGCTTTCTCAAATCCACCCCTTCCCAAGGAAACCCTAAACTCGCAGATAAATTT



--Output

::

 &gt;contig00001
 ttAaGCATTTAACACTGCATATTGATTGATATAGTTGTTCAGTACAAGCCAATTACATT
 GTAGACATAAAACAAAGCATTCGAAACAGTTGAAATTTTGATTCCTCTATACTGGATCAG
 GCGGTAATCAGGGGAAGGAAACCATGGTGTAAGGCTGCATCCCATACTTTATCTATGTCA
 >contig00003
 ggTGGCTGCTTTCTCAAATCCACCCCTTCCCAAGGAAACCCTAAACTCGCAGATAAATTT
 GTAGGGTTTCTATGTCGACCGAGCGCCGTCGGAAAGTGAGCCTTTTCGACGTAGTTGAC
 GAGACCTCAGTCTCTG...


</help>

</tool>