Mercurial > repos > cpt > cpt_gff_remove_fasta
changeset 0:4c354f12adce draft default tip
Uploaded
author | cpt |
---|---|
date | Fri, 17 Jun 2022 12:53:38 +0000 |
parents | |
children | |
files | cpt_gff_rem_fasta/cpt-macros.xml cpt_gff_rem_fasta/gff3_remove_fasta_sequences.py cpt_gff_rem_fasta/gff3_remove_fasta_sequences.xml cpt_gff_rem_fasta/macros.xml |
diffstat | 4 files changed, 265 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gff_rem_fasta/cpt-macros.xml Fri Jun 17 12:53:38 2022 +0000 @@ -0,0 +1,115 @@ +<?xml version="1.0"?> +<macros> + <xml name="gff_requirements"> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="2.12.1">requests</requirement> + <yield/> + </requirements> + <version_command> + <![CDATA[ + cd $__tool_directory__ && git rev-parse HEAD + ]]> + </version_command> + </xml> + <xml name="citation/mijalisrasche"> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex">@unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-crr"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Ross}, + title = {CPT Galaxy Tools}, + year = {2020-}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {E. Mijalis, H. Rasche}, + title = {CPT Galaxy Tools}, + year = {2013-2017}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-2020-AJC-solo"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Criscione}, + title = {CPT Galaxy Tools}, + year = {2019-2021}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="citations-clm"> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </citations> + </xml> + <xml name="sl-citations-clm"> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {C. Maughmer}, + title = {CPT Galaxy Tools}, + year = {2017-2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + <yield/> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gff_rem_fasta/gff3_remove_fasta_sequences.py Fri Jun 17 12:53:38 2022 +0000 @@ -0,0 +1,28 @@ +#!/usr/bin/env python +""" +This program remove the fasta sequences from the end of a .gff3 file +""" + +import argparse + + +def remove_fasta_seq(gff3, ogff3): + # iterates line by line through input + for line in gff3: + # writes lines to output until ##FASTA + if line.startswith("##FASTA"): + return + else: + ogff3.write(line) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Identify lysis gene candidates next to possible endolysin or holin genes", + epilog="", + ) + parser.add_argument("gff3", type=argparse.FileType("r"), help="original gff3 file") + parser.add_argument("--ogff3", type=argparse.FileType("w"), default="output.gff3") + args = parser.parse_args() + + remove_fasta_seq(args.gff3, args.ogff3)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gff_rem_fasta/gff3_remove_fasta_sequences.xml Fri Jun 17 12:53:38 2022 +0000 @@ -0,0 +1,37 @@ +<?xml version="1.1"?> +<tool id="edu.tamu.cpt2.gff3.remove_fasta" name="Remove FASTA Sequences from .gff3 File" version="1.1"> + <description>Removes FASTA sequences at the end of .gff3 file. All lines after '##FASTA' deleted</description> + <macros> + <import>macros.xml</import> + <import>cpt-macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="aggressive"><![CDATA[ +python $__tool_directory__/gff3_remove_fasta_sequences.py +$gff3 +--ogff3 $ogff3 + +]]></command> + <inputs> + <param label="Input .gff3 file" name="gff3" type="data" format="gff3" /> + </inputs> + <outputs> + <data format="gff3" name="ogff3" label="Output file"/> + </outputs> + <help><![CDATA[ +**What it does** +This program removes the lines after '##FASTA' in a .gff3 to be compliant with standard .gff3 format + + ]]></help> + <citations> + <citation type="doi">10.1371/journal.pcbi.1008214</citation> + <citation type="bibtex"> + @unpublished{galaxyTools, + author = {A. Holt}, + title = {CPT Galaxy Tools}, + year = {2020}, + note = {https://github.com/tamu-cpt/galaxy-tools/} + } + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_gff_rem_fasta/macros.xml Fri Jun 17 12:53:38 2022 +0000 @@ -0,0 +1,85 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.8.13">python</requirement> + <requirement type="package" version="1.79">biopython</requirement> + <requirement type="package" version="1.2.2">cpt_gffparser</requirement> + <yield/> + </requirements> + </xml> + <token name="@BLAST_TSV@"> + "$blast_tsv" + </token> + <xml name="blast_tsv"> + <param label="Blast Results" help="TSV/tabular (25 Column)" + name="blast_tsv" type="data" format="tabular" /> + </xml> + + <token name="@BLAST_XML@"> + "$blast_xml" + </token> + <xml name="blast_xml"> + <param label="Blast Results" help="XML format" + name="blast_xml" type="data" format="blastxml" /> + </xml> + <xml name="gff3_with_fasta"> + <param label="Genome Sequences" name="fasta" type="data" format="fasta" /> + <param label="Genome Annotations" name="gff3" type="data" format="gff3" /> + </xml> + <xml name="genome_selector"> + <conditional name="reference_genome"> + <param name="reference_genome_source" type="select" label="Reference Genome"> + <option value="history" selected="True">From History</option> + <option value="cached">Locally Cached</option> + </param> + <when value="cached"> + <param name="fasta_indexes" type="select" label="Source FASTA Sequence"> + <options from_data_table="all_fasta"/> + </param> + </when> + <when value="history"> + <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/> + </when> + </conditional> + </xml> + <xml name="gff3_input"> + <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/> + </xml> + <xml name="input/gff3+fasta"> + <expand macro="gff3_input" /> + <expand macro="genome_selector" /> + </xml> + <token name="@INPUT_GFF@"> + "$gff3_data" + </token> + <token name="@INPUT_FASTA@"> +#if str($reference_genome.reference_genome_source) == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa +#end if + </token> + <token name="@GENOME_SELECTOR_PRE@"> +#if $reference_genome.reference_genome_source == 'history': + ln -s $reference_genome.genome_fasta genomeref.fa; +#end if + </token> + <token name="@GENOME_SELECTOR@"> +#if str($reference_genome.reference_genome_source) == 'cached': + "${reference_genome.fasta_indexes.fields.path}" +#else if str($reference_genome.reference_genome_source) == 'history': + genomeref.fa +#end if + </token> + <xml name="input/fasta"> + <param label="Fasta file" name="sequences" type="data" format="fasta"/> + </xml> + + <token name="@SEQUENCE@"> + "$sequences" + </token> + <xml name="input/fasta/protein"> + <param label="Protein fasta file" name="sequences" type="data" format="fasta"/> + </xml> +</macros>