changeset 0:4c354f12adce draft default tip

Uploaded
author cpt
date Fri, 17 Jun 2022 12:53:38 +0000
parents
children
files cpt_gff_rem_fasta/cpt-macros.xml cpt_gff_rem_fasta/gff3_remove_fasta_sequences.py cpt_gff_rem_fasta/gff3_remove_fasta_sequences.xml cpt_gff_rem_fasta/macros.xml
diffstat 4 files changed, 265 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rem_fasta/cpt-macros.xml	Fri Jun 17 12:53:38 2022 +0000
@@ -0,0 +1,115 @@
+<?xml version="1.0"?>
+<macros>
+	<xml name="gff_requirements">
+		<requirements>
+			<requirement type="package" version="2.7">python</requirement>
+			<requirement type="package" version="1.65">biopython</requirement>
+			<requirement type="package" version="2.12.1">requests</requirement>
+			<yield/>
+		</requirements>
+		<version_command>
+		<![CDATA[
+			cd $__tool_directory__ && git rev-parse HEAD
+		]]>
+		</version_command>
+	</xml>
+	<xml name="citation/mijalisrasche">
+		<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+		<citation type="bibtex">@unpublished{galaxyTools,
+		author = {E. Mijalis, H. Rasche},
+		title = {CPT Galaxy Tools},
+		year = {2013-2017},
+		note = {https://github.com/tamu-cpt/galaxy-tools/}
+		}
+		</citation>
+	</xml>
+	<xml name="citations">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation> 
+		<yield/>
+		</citations>
+	</xml>
+    	<xml name="citations-crr">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Ross},
+				title = {CPT Galaxy Tools},
+				year = {2020-},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+		<yield/>
+		</citations>
+	</xml>
+        <xml name="citations-2020">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {E. Mijalis, H. Rasche},
+				title = {CPT Galaxy Tools},
+				year = {2013-2017},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="citations-2020-AJC-solo">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+                        <citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {A. Criscione},
+				title = {CPT Galaxy Tools},
+				year = {2019-2021},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+                        </citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="citations-clm">
+		<citations>
+			<citation type="doi">10.1371/journal.pcbi.1008214</citation>
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <yield/>
+		</citations>
+	</xml>
+        <xml name="sl-citations-clm">
+			<citation type="bibtex">
+			@unpublished{galaxyTools,
+				author = {C. Maughmer},
+				title = {CPT Galaxy Tools},
+				year = {2017-2020},
+				note = {https://github.com/tamu-cpt/galaxy-tools/}
+			}
+			</citation>
+                        <yield/>
+	</xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rem_fasta/gff3_remove_fasta_sequences.py	Fri Jun 17 12:53:38 2022 +0000
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+"""
+This program remove the fasta sequences from the end of a .gff3 file
+"""
+
+import argparse
+
+
+def remove_fasta_seq(gff3, ogff3):
+    # iterates line by line through input
+    for line in gff3:
+        # writes lines to output until ##FASTA
+        if line.startswith("##FASTA"):
+            return
+        else:
+            ogff3.write(line)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Identify lysis gene candidates next to possible endolysin or holin genes",
+        epilog="",
+    )
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="original gff3 file")
+    parser.add_argument("--ogff3", type=argparse.FileType("w"), default="output.gff3")
+    args = parser.parse_args()
+
+    remove_fasta_seq(args.gff3, args.ogff3)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rem_fasta/gff3_remove_fasta_sequences.xml	Fri Jun 17 12:53:38 2022 +0000
@@ -0,0 +1,37 @@
+<?xml version="1.1"?>
+<tool id="edu.tamu.cpt2.gff3.remove_fasta" name="Remove FASTA Sequences from .gff3 File" version="1.1">
+    <description>Removes FASTA sequences at the end of .gff3 file. All lines after '##FASTA' deleted</description>
+    <macros>
+		<import>macros.xml</import>
+		<import>cpt-macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="aggressive"><![CDATA[
+python $__tool_directory__/gff3_remove_fasta_sequences.py
+$gff3
+--ogff3 $ogff3
+
+]]></command>
+    <inputs>
+        <param label="Input .gff3 file" name="gff3" type="data" format="gff3" />
+    </inputs>
+    <outputs>
+	<data format="gff3" name="ogff3" label="Output file"/>
+    </outputs>
+    <help><![CDATA[
+**What it does**
+This program removes the lines after '##FASTA' in a .gff3 to be compliant with standard .gff3 format
+
+        ]]></help>
+    <citations>
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">
+        @unpublished{galaxyTools, 
+            author = {A. Holt},
+            title = {CPT Galaxy Tools},
+            year = {2020},
+            note = {https://github.com/tamu-cpt/galaxy-tools/}
+        }
+        </citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gff_rem_fasta/macros.xml	Fri Jun 17 12:53:38 2022 +0000
@@ -0,0 +1,85 @@
+<?xml version="1.0"?>
+<macros>
+	<xml name="requirements">
+		<requirements>
+			<requirement type="package" version="3.8.13">python</requirement>
+			<requirement type="package" version="1.79">biopython</requirement>
+			<requirement type="package" version="1.2.2">cpt_gffparser</requirement>  
+			<yield/>
+		</requirements>
+	</xml>
+	<token name="@BLAST_TSV@">
+		"$blast_tsv"
+	</token>
+	<xml name="blast_tsv">
+		<param label="Blast Results" help="TSV/tabular (25 Column)"
+			name="blast_tsv" type="data" format="tabular" />
+	</xml>
+
+	<token name="@BLAST_XML@">
+		"$blast_xml"
+	</token>
+	<xml name="blast_xml">
+		<param label="Blast Results" help="XML format"
+			name="blast_xml" type="data" format="blastxml" />
+	</xml>
+	<xml name="gff3_with_fasta">
+	<param label="Genome Sequences" name="fasta" type="data" format="fasta" />
+	<param label="Genome Annotations" name="gff3" type="data" format="gff3" />
+	</xml>
+	<xml name="genome_selector">
+		<conditional name="reference_genome">
+			<param name="reference_genome_source" type="select" label="Reference Genome">
+				<option value="history" selected="True">From History</option>
+				<option value="cached">Locally Cached</option>
+			</param>
+			<when value="cached">
+				<param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+					<options from_data_table="all_fasta"/>
+				</param>
+			</when>
+			<when value="history">
+				<param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+			</when>
+		</conditional>
+	</xml>
+	<xml name="gff3_input">
+		<param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+	</xml>
+	<xml name="input/gff3+fasta">
+		<expand macro="gff3_input" />
+		<expand macro="genome_selector" />
+	</xml>
+	<token name="@INPUT_GFF@">
+	"$gff3_data"
+	</token>
+	<token name="@INPUT_FASTA@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+		"${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+		genomeref.fa
+#end if
+	</token>
+	<token name="@GENOME_SELECTOR_PRE@">
+#if $reference_genome.reference_genome_source == 'history':
+		ln -s $reference_genome.genome_fasta genomeref.fa;
+#end if
+	</token>
+	<token name="@GENOME_SELECTOR@">
+#if str($reference_genome.reference_genome_source) == 'cached':
+		"${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+		genomeref.fa
+#end if
+	</token>
+        <xml name="input/fasta">
+		<param label="Fasta file" name="sequences" type="data" format="fasta"/>
+	</xml>
+
+	<token name="@SEQUENCE@">
+		"$sequences"
+	</token>
+	<xml name="input/fasta/protein">
+		<param label="Protein fasta file" name="sequences" type="data" format="fasta"/>
+	</xml>
+</macros>