Galaxy |

Changeset 1:bb6332a85aa6 (2023-06-05)

Previous changeset 0:a68f32350196 (2022-06-17) Next changeset 2:a921d6148d88 (2024-01-05)

Commit message:
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c

added:
cpt-macros.xml
cpt_gbkToGff3.xml
gbk_to_gff3.py
macros.xml

removed:
cpt_gbk_to_gff/cpt-macros.xml
cpt_gbk_to_gff/cpt_gbkToGff3.xml
cpt_gbk_to_gff/gbk_to_gff3.py
cpt_gbk_to_gff/macros.xml

diff -r a68f32350196 -r bb6332a85aa6 cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt-macros.xml Mon Jun 05 02:43:04 2023 +0000

[

@@ -0,0 +1,115 @@
+<macros>
+    <xml name="gff_requirements">
+        <requirements>
+            <requirement type="package" version="2.7">python</requirement>
+            <requirement type="package" version="1.65">biopython</requirement>
+            <requirement type="package" version="2.12.1">requests</requirement>
+ <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
+            <yield/>
+        </requirements>
+        <version_command>
+ <![CDATA[
+ cd '$__tool_directory__' && git rev-parse HEAD
+ ]]>
+ </version_command>
+    </xml>
+    <xml name="citation/mijalisrasche">
+        <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+        <citation type="bibtex">@unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-crr">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-2020-AJC-solo">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="citations-clm">
+        <citations>
+            <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+            <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+            <yield/>
+        </citations>
+    </xml>
+    <xml name="sl-citations-clm">
+        <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+ </citation>
+        <yield/>
+    </xml>
+</macros>

diff -r a68f32350196 -r bb6332a85aa6 cpt_gbkToGff3.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_gbkToGff3.xml Mon Jun 05 02:43:04 2023 +0000

[

@@ -0,0 +1,46 @@
+<tool id="edu.tamu.cpt.gff3.customGbkToGff" name="(CPT) Genbank to GFF3: " version="20.1.0.0">
+  <description> CPT made Biobython-based solution</description>
+  <macros>
+    <import>macros.xml</import>
+    <import>cpt-macros.xml</import>
+  </macros>
+  <expand macro="requirements"/>
+  <command detect_errors="aggressive"><![CDATA[
+'$__tool_directory__/gbk_to_gff3.py'
+'$gbkIn'
+'$makeMRNA'
+'$makeGene'
+--identifier "$qualID"
+--fastaFile '$fastaOut'
+> '$default']]></command>
+  <inputs>
+    <param label="GenBank file" name="gbkIn" type="data" format="genbank"/>
+    <param checked="true" label="Automatically generate any missing Gene features if CDS/RBS has none" name="makeGene" type="boolean" truevalue="--makeGene" falsevalue=""/>
+    <param checked="true" label="Automatically generate missing mRNA features for genes" name="makeMRNA" type="boolean" truevalue="--makeMRNA" falsevalue=""/>
+    <param label="Qualifier to derive GFF ID from" name="qualID" type="text" value="locus_tag"/>
+  </inputs>
+  <outputs>
+    <data format="gff3" hidden="false" name="default"/>
+    <data format="fasta" hidden="false" name="fastaOut"/>
+  </outputs>
+  <tests>
+  </tests>
+  <help><![CDATA[
+**What it does**
+
+A Biopython-based script to convert Genbank files to GFF3. Should resolve frame shift errors and other problems caused by the old Bioperl  solution.
+
+Will also attempt to automatically parent RBS, CDS, and Exon features without a locus tag to an appropriate gene feature.
+]]></help>
+  <citations>
+    <citation type="doi">10.1371/journal.pcbi.1008214</citation>
+    <citation type="bibtex">
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+                        </citation>
+  </citations>
+</tool>

diff -r a68f32350196 -r bb6332a85aa6 cpt_gbk_to_gff/cpt-macros.xml
--- a/cpt_gbk_to_gff/cpt-macros.xml Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,115 +0,0 @@
-<?xml version="1.0"?>
-<macros>
- <xml name="gff_requirements">
- <requirements>
- <requirement type="package" version="2.7">python</requirement>
- <requirement type="package" version="1.65">biopython</requirement>
- <requirement type="package" version="2.12.1">requests</requirement>
- <yield/>
- </requirements>
- <version_command>
- <![CDATA[
- cd $__tool_directory__ && git rev-parse HEAD
- ]]>
- </version_command>
- </xml>
- <xml name="citation/mijalisrasche">
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">@unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- </xml>
- <xml name="citations">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- <yield/>
- </citations>
- </xml>
-     <xml name="citations-crr">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Ross},
- title = {CPT Galaxy Tools},
- year = {2020-},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
- <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {E. Mijalis, H. Rasche},
- title = {CPT Galaxy Tools},
- year = {2013-2017},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-2020-AJC-solo">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="citations-clm">
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </citations>
- </xml>
-        <xml name="sl-citations-clm">
- <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {C. Maughmer},
- title = {CPT Galaxy Tools},
- year = {2017-2020},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
- </citation>
-                        <yield/>
- </xml>
-</macros>

diff -r a68f32350196 -r bb6332a85aa6 cpt_gbk_to_gff/cpt_gbkToGff3.xml
--- a/cpt_gbk_to_gff/cpt_gbkToGff3.xml Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,49 +0,0 @@
-<?xml version="1.0"?>
-<tool id="edu.tamu.cpt.gff3.customGbkToGff" name="(CPT) Genbank to GFF3: " version="20.1.0.0">
-  <description> CPT made Biobython-based solution</description>
-  <macros>
-    <import>macros.xml</import>
- <import>cpt-macros.xml</import>
-  </macros>
-  <expand macro="requirements"/>
-  <command detect_errors="aggressive"><![CDATA[
-$__tool_directory__/gbk_to_gff3.py
-$gbkIn
-$makeMRNA
-$makeGene
---identifier "$qualID"
---fastaFile $fastaOut
-> $default]]></command>
-  <inputs>
-    <param label="GenBank file" name="gbkIn" type="data" format="genbank"/>
-    <param checked="true" label="Automatically generate any missing Gene features if CDS/RBS has none" name="makeGene"
-        type="boolean" truevalue="--makeGene" falsevalue=""/>
-    <param checked="true" label="Automatically generate missing mRNA features for genes" name="makeMRNA"
-        type="boolean" truevalue="--makeMRNA" falsevalue=""/>
-    <param label="Qualifier to derive GFF ID from" name="qualID" type="text" value="locus_tag"/>
-  </inputs>
-  <outputs>
-    <data format="gff3" hidden="false" name="default"/>
-    <data format="fasta" hidden="false" name="fastaOut"/>
-  </outputs>
-  <tests>
-  </tests>
-  <help><![CDATA[
-**What it does**
-
-A Biopython-based script to convert Genbank files to GFF3. Should resolve frame shift errors and other problems caused by the old Bioperl  solution.
-
-Will also attempt to automatically parent RBS, CDS, and Exon features without a locus tag to an appropriate gene feature.
-]]></help>
- <citations>
- <citation type="doi">10.1371/journal.pcbi.1008214</citation>
-                        <citation type="bibtex">
- @unpublished{galaxyTools,
- author = {A. Criscione},
- title = {CPT Galaxy Tools},
- year = {2019-2021},
- note = {https://github.com/tamu-cpt/galaxy-tools/}
- }
-                        </citation>
- </citations>
-</tool>

diff -r a68f32350196 -r bb6332a85aa6 cpt_gbk_to_gff/gbk_to_gff3.py
--- a/cpt_gbk_to_gff/gbk_to_gff3.py Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,274 +0,0 @@\n-#!/usr/bin/env python\n-\n-import argparse\n-import sys\n-\n-from Bio import SeqIO\n-from Bio.SeqRecord import SeqRecord\n-from Bio.SeqFeature import FeatureLocation\n-from CPT_GFFParser import gffSeqFeature, gffWrite\n-\n-bottomFeatTypes = ["exon", "RBS", "CDS"]\n-\n-def makeGffFeat(inFeat, num, recName, identifier):\n- if inFeat.type == "RBS" or (inFeat.type == "regulatory" and "regulatory_class" in inFeat.qualifiers.keys() and inFeat.qualifiers["regulatory_class"][0] == "ribosome_binding_site"):\n- inFeat.type = "Shine_Dalgarno_sequence"\n- if "codon_start" in inFeat.qualifiers.keys():\n- shift = int(inFeat.qualifiers["codon_start"][0]) - 1\n- else:\n- shift = "."\n- if identifier in inFeat.qualifiers.keys():\n- name = inFeat.qualifiers[identifier][0] + "." + inFeat.type \n- if num > 0:\n- name += "." + str(num)\n- else:\n- name = recName + "." + inFeat.type + "." + str(num)\n- \n- outFeat = gffSeqFeature(inFeat.location, inFeat.type, \'\', inFeat.strand, name, inFeat.qualifiers, None, None, None, shift, 0, "GbkToGff")\n- outFeat.qualifiers["ID"] = [name] \n- return outFeat\n-\n-def main(inFile, makeMRNA, makeGene, identifier, fastaFile, outFile):\n-\n- ofh = sys.stdout\n- if outFile:\n- ofh = outFile\n-\n- outRec = []\n- failed = 0\n- for rec in SeqIO.parse(inFile, "genbank"):\n- recID = rec.name\n-\n- if len(str(rec.seq)) > 0:\n- seqs_pending_writes = True\n- outSeq = str(rec.seq)\n- seqLen = len(outSeq)\n-\n- locBucket = {}\n- outFeats = []\n- topTypeDict = {}\n- seekingParent = []\n- geneNum = 0\n- autoGeneNum = 0\n- for feat in rec.features:\n- if identifier not in feat.qualifiers.keys(): #Allow metadata features and other features with no ID (Output warning?) - AJC\n- if feat.type in bottomFeatTypes:\n- seekingParent.append([feat, [], []]) # [Feature, all parent candidates, strongest parent candidates]\n- continue\n- elif feat.type not in topTypeDict.keys():\n- topTypeDict[feat.type] = 1\n- else:\n- topTypeDict[feat.type] += 1\n- outFeats.append(makeGffFeat(feat, topTypeDict[feat.type], recID, identifier))\n- continue\n- elif feat.qualifiers[identifier][0] not in locBucket.keys():\n- locBucket[feat.qualifiers[identifier][0]] = []\n- locBucket[feat.qualifiers[identifier][0]].append(feat)\n-\n- for locus in locBucket.keys():\n- minLoc = locBucket[locus][0].location.start\n- maxLoc = locBucket[locus][0].location.end\n- for feat in locBucket[locus]:\n- minLoc = min(minLoc, feat.location.start)\n- maxLoc = max(maxLoc, feat.location.end)\n- for x in seekingParent:\n- if x[0].location.start >= minLoc and x[0].location.end <= maxLoc:\n- x[1].append(locus)\n- if x[0].location.start == minLoc or x[0].location.end == maxLoc:\n- x[2].append(locus)\n-\n- for x in seekingParent: #Reformat to [Feature, Locus, Unused/Free]\n- if len(x[2]) == 1:\n- finList = ""\n- if len(x[1]) > 1:\n- for loc in x[1]:\n- if loc != x[2][0]:\n- finList += loc + ", "\n- finList = str(x[0].type) + " had no locus tag set in .gbk file, automatically derived. Other, weaker candidate(s) were " + finList[0:-2] + "."\n- else:\n- finList = str(x[0].type) + " had no locus tag set in .gbk file, automatically derived."\n- if "Notes" not in x[0].qualifiers.keys():\n- x[0].qualifiers["Notes"] = []\n- x[0].qualifiers["Notes"].append(finList)\n- x[1] = x[2][0]\n- elif len(x[2]) > 1:\n- candidate = x[2][0] #Arbitrarily choose first one\n- finList = ""\n- strongList = ""\n- f'..b'peDict[x] = 1\n- \n- if not topFeat:\n- if makeGene:\n- if midFeat:\n- possibleStrand = midFeat.strand\n- else:\n- possibleStrand = bottomFeats[0].strand\n- tempName = recID + ".gene." + str(geneNum)\n- tempQuals = {identifier : [locus], "ID" : [tempName], "Notes" : ["Gene feature automatically generated by Gbk to GFF conversion"]}\n- topFeat = gffSeqFeature(FeatureLocation(minLoc, maxLoc, possibleStrand), \'gene\', \'\', possibleStrand, tempName, tempQuals, None, None, None, ".", 0, "GbkToGff")\n- else:\n- sys.stderr.write("Unable to create a feature heirarchy at location [%d, %d] with features: \\n" % (minLoc, maxLoc))\n- for x in locBucket[locus]:\n- sys.stderr.write(str(x))\n- sys.stderr.write(\'\\n\')\n- failed = 1\n- continue\n-\n- outFeats.append(makeGffFeat(topFeat, 0, recID, identifier))\n- if not midFeat and topFeat.type == "gene" and makeMRNA:\n- if identifier in topFeat.qualifiers.keys():\n- tempName = topFeat.qualifiers[identifier][0] + ".mRNA"\n- tempQuals = {identifier : topFeat.qualifiers[identifier], "ID" : [tempName], "Notes" : ["mRNA feature automatically generated by Gbk to GFF conversion"]}\n- else:\n- tempName = outFeats[-1].ID + ".mRNA"\n- tempQuals = {identifier : topFeat.qualifiers[identifier], "ID" : [tempName], "Notes" : ["mRNA feature automatically generated by Gbk to GFF conversion"]}\n- midFeat = gffSeqFeature(FeatureLocation(minLoc, maxLoc, topFeat.strand), \'mRNA\', \'\', topFeat.strand, tempName, tempQuals, None, None, None, ".", 0, "GbkToGff")\n- \n- if midFeat: # Again, need a new if statement if we want to handle multiple mid-tier features\n- outFeats[-1].sub_features.append(makeGffFeat(midFeat, 0, recID, identifier))\n- outFeats[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].id]\n- for x in bottomFeats:\n- typeDict[x.type] += 1\n- outFeats[-1].sub_features[-1].sub_features.append(makeGffFeat(x, typeDict[x.type], recID, identifier))\n- outFeats[-1].sub_features[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].sub_features[-1].id]\n- else: # No midFeat, append bottom feats directly to top feats \n- for x in bottomFeats:\n- typeDict[x.type] += 1\n- outFeats[-1].sub_features.append(makeGffFeat(x, typeDict[x.type], recID, identifier))\n- outFeats[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].id]\n- \n- outRec.append(SeqRecord(rec.seq, recID, rec.name, rec.description, rec.dbxrefs, sorted(outFeats, key=lambda x: x.location.start), rec.annotations, rec.letter_annotations))\n- SeqIO.write([outRec[-1]], fastaFile, "fasta")\n- gffWrite(outRec, ofh) \n- exit(failed) # 0 if all features handled, 1 if unable to handle some\n-\n-\n-if __name__ == \'__main__\':\n- parser = argparse.ArgumentParser( description=\'Biopython solution to Gbk to GFF conversion\')\n-\n- parser.add_argument(\'inFile\', type=argparse.FileType("r"), help=\'Path to an input GBK file\' )\n- parser.add_argument(\'--makeMRNA\', action="store_true", required=False, help="Automatically create mRNA features")\n- parser.add_argument(\'--makeGene\', action="store_true", required=False, help="Automatically create missing Gene features")\n- parser.add_argument(\'--identifier\', type=str, default="locus_tag", required=False, help="Qualifier to derive ID property from")\n- parser.add_argument(\'--fastaFile\', type=argparse.FileType("w"), help=\'Fasta output for sequences\' )\n- parser.add_argument(\'--outFile\', type=argparse.FileType("w"), help=\'GFF feature output\' )\n- args = parser.parse_args()\n- main(**vars(args))\n-\n-\n-\n-\n-\n-\n-\n-\n'

diff -r a68f32350196 -r bb6332a85aa6 cpt_gbk_to_gff/macros.xml
--- a/cpt_gbk_to_gff/macros.xml Fri Jun 17 12:46:43 2022 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

@@ -1,105 +0,0 @@
-<?xml version="1.0"?>
-<macros>
-  <xml name="requirements">
-    <requirements>
- <requirement type="package" version="3.8.13">python</requirement>
- <requirement type="package" version="1.79">biopython</requirement>
- <requirement type="package" version="1.2.2">cpt_gffparser</requirement>
- <yield/>
-    </requirements>
-  </xml>
-  <xml name="ldap_ref"
-    token_name="dn_ref"
-    token_label="Pick a DN"
-    token_fromfile="ldap_people.loc">
-        <repeat name="repeat_@NAME@" title="@LABEL@">
-          <param name="@NAME@" label="Select a @LABEL@" type="select">
-            <options from_file="@FROMFILE@">
-                <column name="name" index="0"/>
-                <column name="value" index="1"/>
-            </options>
-          </param>
-        </repeat>
-    </xml>
-  <xml name="ldap_ref_single"
-    token_name="dn_ref"
-    token_label="Pick a DN"
-    token_fromfile="ldap_people.loc">
-          <param name="@NAME@" label="Select a @LABEL@" type="select">
-            <options from_file="@FROMFILE@">
-                <column name="name" index="0"/>
-                <column name="value" index="1"/>
-            </options>
-          </param>
-    </xml>
- <xml name="gbk_feature_type"
- token_label="Feature type to remove"
- token_multiple="True"
- token_optional="False"
- token_name="positional_2">
-    <param label="@LABEL@" optional="@TOKEN_OPTIONAL" multiple="@MULTIPLE@" name="feature_type" type="select">
-      <option value="-10_signal">-10_signal</option>
-      <option value="-35_signal">-35_signal</option>
-      <option value="3'UTR">3'UTR</option>
-      <option value="5'UTR">5'UTR</option>
-      <option value="CAAT_signal">CAAT_signal</option>
-      <option selected="true" value="CDS">CDS</option>
-      <option value="C_region">C_region</option>
-      <option value="D-loop">D-loop</option>
-      <option value="D_segment">D_segment</option>
-      <option value="GC_signal">GC_signal</option>
-      <option value="J_segment">J_segment</option>
-      <option value="LTR">LTR</option>
-      <option value="N_region">N_region</option>
-      <option value="RBS">RBS</option>
-      <option value="STS">STS</option>
-      <option value="S_region">S_region</option>
-      <option value="TATA_signal">TATA_signal</option>
-      <option value="V_region">V_region</option>
-      <option value="V_segment">V_segment</option>
-      <option value="all">all</option>
-      <option value="assembly_gap">assembly_gap</option>
-      <option value="attenuator">attenuator</option>
-      <option value="enhancer">enhancer</option>
-      <option value="exon">exon</option>
-      <option value="gap">gap</option>
-      <option value="gene">gene</option>
-      <option value="iDNA">iDNA</option>
-      <option value="intron">intron</option>
-      <option value="mRNA">mRNA</option>
-      <option value="mat_peptide">mat_peptide</option>
-      <option value="misc_RNA">misc_RNA</option>
-      <option value="misc_binding">misc_binding</option>
-      <option value="misc_difference">misc_difference</option>
-      <option value="misc_feature">misc_feature</option>
-      <option value="misc_recomb">misc_recomb</option>
-      <option value="misc_signal">misc_signal</option>
-      <option value="misc_structure">misc_structure</option>
-      <option value="mobile_element">mobile_element</option>
-      <option value="modified_base">modified_base</option>
-      <option value="ncRNA">ncRNA</option>
-      <option value="old_sequence">old_sequence</option>
-      <option value="operon">operon</option>
-      <option value="oriT">oriT</option>
-      <option value="polyA_signal">polyA_signal</option>
-      <option value="polyA_site">polyA_site</option>
-      <option value="precursor_RNA">precursor_RNA</option>
-      <option value="prim_transcript">prim_transcript</option>
-      <option value="primer_bind">primer_bind</option>
-      <option value="promoter">promoter</option>
-      <option value="protein_bind">protein_bind</option>
-      <option value="rRNA">rRNA</option>
-      <option value="rep_origin">rep_origin</option>
-      <option value="repeat_region">repeat_region</option>
-      <option value="sig_peptide">sig_peptide</option>
-      <option value="source">source</option>
-      <option value="stem_loop">stem_loop</option>
-      <option value="tRNA">tRNA</option>
-      <option value="terminator">terminator</option>
-      <option value="tmRNA">tmRNA</option>
-      <option value="transit_peptide">transit_peptide</option>
-      <option value="unsure">unsure</option>
-      <option value="variation">variation</option>
-    </param>
- </xml>
-</macros>

diff -r a68f32350196 -r bb6332a85aa6 gbk_to_gff3.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gbk_to_gff3.py Mon Jun 05 02:43:04 2023 +0000

[

b'@@ -0,0 +1,477 @@\n+#!/usr/bin/env python\n+\n+import argparse\n+import sys\n+\n+from Bio import SeqIO\n+from Bio.SeqRecord import SeqRecord\n+from Bio.SeqFeature import FeatureLocation\n+from CPT_GFFParser import gffSeqFeature, gffWrite\n+\n+bottomFeatTypes = ["exon", "RBS", "CDS"]\n+\n+\n+def makeGffFeat(inFeat, num, recName, identifier):\n+ if inFeat.type == "RBS" or (\n+ inFeat.type == "regulatory"\n+ and "regulatory_class" in inFeat.qualifiers.keys()\n+ and inFeat.qualifiers["regulatory_class"][0] == "ribosome_binding_site"\n+ ):\n+ inFeat.type = "Shine_Dalgarno_sequence"\n+ if "codon_start" in inFeat.qualifiers.keys():\n+ shift = int(inFeat.qualifiers["codon_start"][0]) - 1\n+ else:\n+ shift = "."\n+ if identifier in inFeat.qualifiers.keys():\n+ name = inFeat.qualifiers[identifier][0] + "." + inFeat.type\n+ if num > 0:\n+ name += "." + str(num)\n+ else:\n+ name = recName + "." + inFeat.type + "." + str(num)\n+\n+ outFeat = gffSeqFeature(\n+ inFeat.location,\n+ inFeat.type,\n+ "",\n+ inFeat.strand,\n+ name,\n+ inFeat.qualifiers,\n+ None,\n+ None,\n+ None,\n+ shift,\n+ 0,\n+ "GbkToGff",\n+ )\n+ outFeat.qualifiers["ID"] = [name]\n+ return outFeat\n+\n+\n+def main(inFile, makeMRNA, makeGene, identifier, fastaFile, outFile):\n+\n+ ofh = sys.stdout\n+ if outFile:\n+ ofh = outFile\n+\n+ outRec = []\n+ failed = 0\n+ for rec in SeqIO.parse(inFile, "genbank"):\n+ recID = rec.name\n+\n+ if len(str(rec.seq)) > 0:\n+ seqs_pending_writes = True\n+ outSeq = str(rec.seq)\n+ seqLen = len(outSeq)\n+\n+ locBucket = {}\n+ outFeats = []\n+ topTypeDict = {}\n+ seekingParent = []\n+ geneNum = 0\n+ autoGeneNum = 0\n+ for feat in rec.features:\n+ if (\n+ identifier not in feat.qualifiers.keys()\n+ ): # Allow metadata features and other features with no ID (Output warning?) - AJC\n+ if feat.type in bottomFeatTypes:\n+ seekingParent.append(\n+ [feat, [], []]\n+ ) # [Feature, all parent candidates, strongest parent candidates]\n+ continue\n+ elif feat.type not in topTypeDict.keys():\n+ topTypeDict[feat.type] = 1\n+ else:\n+ topTypeDict[feat.type] += 1\n+ outFeats.append(\n+ makeGffFeat(feat, topTypeDict[feat.type], recID, identifier)\n+ )\n+ continue\n+ elif feat.qualifiers[identifier][0] not in locBucket.keys():\n+ locBucket[feat.qualifiers[identifier][0]] = []\n+ locBucket[feat.qualifiers[identifier][0]].append(feat)\n+\n+ for locus in locBucket.keys():\n+ minLoc = locBucket[locus][0].location.start\n+ maxLoc = locBucket[locus][0].location.end\n+ for feat in locBucket[locus]:\n+ minLoc = min(minLoc, feat.location.start)\n+ maxLoc = max(maxLoc, feat.location.end)\n+ for x in seekingParent:\n+ if x[0].location.start >= minLoc and x[0].location.end <= maxLoc:\n+ x[1].append(locus)\n+ if x[0].location.start == minLoc or x[0].location.end == maxLoc:\n+ x[2].append(locus)\n+\n+ for x in seekingParent: # Reformat to [Feature, Locus, Unused/Free]\n+ if len(x[2]) == 1:\n+ finList = ""\n+ if len(x[1]) > 1:\n+ for loc in x[1]:\n+ if loc != x[2][0]:\n+ finList += loc + ", "\n+ finList = (\n+ str(x[0].type)\n+ + " had no locus tag set in .gbk file, automatically derived. Other, weaker candidate(s) were "\n+ + finList[0:-2]\n+ '..b' tempQuals = {\n+ identifier: topFeat.qualifiers[identifier],\n+ "ID": [tempName],\n+ "Notes": [\n+ "mRNA feature automatically generated by Gbk to GFF conversion"\n+ ],\n+ }\n+ else:\n+ tempName = outFeats[-1].ID + ".mRNA"\n+ tempQuals = {\n+ identifier: topFeat.qualifiers[identifier],\n+ "ID": [tempName],\n+ "Notes": [\n+ "mRNA feature automatically generated by Gbk to GFF conversion"\n+ ],\n+ }\n+ midFeat = gffSeqFeature(\n+ FeatureLocation(minLoc, maxLoc, topFeat.strand),\n+ "mRNA",\n+ "",\n+ topFeat.strand,\n+ tempName,\n+ tempQuals,\n+ None,\n+ None,\n+ None,\n+ ".",\n+ 0,\n+ "GbkToGff",\n+ )\n+\n+ if (\n+ midFeat\n+ ): # Again, need a new if statement if we want to handle multiple mid-tier features\n+ outFeats[-1].sub_features.append(\n+ makeGffFeat(midFeat, 0, recID, identifier)\n+ )\n+ outFeats[-1].sub_features[-1].qualifiers["Parent"] = [outFeats[-1].id]\n+ for x in bottomFeats:\n+ typeDict[x.type] += 1\n+ outFeats[-1].sub_features[-1].sub_features.append(\n+ makeGffFeat(x, typeDict[x.type], recID, identifier)\n+ )\n+ outFeats[-1].sub_features[-1].sub_features[-1].qualifiers[\n+ "Parent"\n+ ] = [outFeats[-1].sub_features[-1].id]\n+ else: # No midFeat, append bottom feats directly to top feats\n+ for x in bottomFeats:\n+ typeDict[x.type] += 1\n+ outFeats[-1].sub_features.append(\n+ makeGffFeat(x, typeDict[x.type], recID, identifier)\n+ )\n+ outFeats[-1].sub_features[-1].qualifiers["Parent"] = [\n+ outFeats[-1].id\n+ ]\n+\n+ outRec.append(\n+ SeqRecord(\n+ rec.seq,\n+ recID,\n+ rec.name,\n+ rec.description,\n+ rec.dbxrefs,\n+ sorted(outFeats, key=lambda x: x.location.start),\n+ rec.annotations,\n+ rec.letter_annotations,\n+ )\n+ )\n+ SeqIO.write([outRec[-1]], fastaFile, "fasta")\n+ gffWrite(outRec, ofh)\n+ exit(failed) # 0 if all features handled, 1 if unable to handle some\n+\n+\n+if __name__ == "__main__":\n+ parser = argparse.ArgumentParser(\n+ description="Biopython solution to Gbk to GFF conversion"\n+ )\n+\n+ parser.add_argument(\n+ "inFile", type=argparse.FileType("r"), help="Path to an input GBK file"\n+ )\n+ parser.add_argument(\n+ "--makeMRNA",\n+ action="store_true",\n+ required=False,\n+ help="Automatically create mRNA features",\n+ )\n+ parser.add_argument(\n+ "--makeGene",\n+ action="store_true",\n+ required=False,\n+ help="Automatically create missing Gene features",\n+ )\n+ parser.add_argument(\n+ "--identifier",\n+ type=str,\n+ default="locus_tag",\n+ required=False,\n+ help="Qualifier to derive ID property from",\n+ )\n+ parser.add_argument(\n+ "--fastaFile", type=argparse.FileType("w"), help="Fasta output for sequences"\n+ )\n+ parser.add_argument(\n+ "--outFile", type=argparse.FileType("w"), help="GFF feature output"\n+ )\n+ args = parser.parse_args()\n+ main(**vars(args))\n'

diff -r a68f32350196 -r bb6332a85aa6 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Mon Jun 05 02:43:04 2023 +0000

@@ -0,0 +1,74 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package">progressivemauve</requirement>
+            
+            <requirement type="package" version="0.6.4">bcbiogff</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <token name="@WRAPPER_VERSION@">2.4.0</token>
+    <xml name="citation/progressive_mauve">
+        <citation type="doi">10.1371/journal.pone.0011147</citation>
+    </xml>
+    <xml name="citation/gepard">
+        <citation type="doi">10.1093/bioinformatics/btm039</citation>
+    </xml>
+    <token name="@XMFA_INPUT@">
+ '$xmfa'
+ </token>
+    <xml name="xmfa_input" token_formats="xmfa">
+        <param type="data" format="@FORMATS@" name="xmfa" label="XMFA MSA"/>
+    </xml>
+    <token name="@XMFA_FA_INPUT@">
+ '$sequences'
+ </token>
+    <xml name="xmfa_fa_input">
+        <param type="data" format="fasta" name="sequences" label="Sequences in alignment" help="These sequences should be the SAME DATASET that was used in the progressiveMauve run. Failing that, they should be provided in the same order as in original progressiveMauve run"/>
+    </xml>
+    <xml name="genome_selector">
+        <conditional name="reference_genome">
+            <param name="reference_genome_source" type="select" label="Reference Genome">
+                <option value="history" selected="True">From History</option>
+                <option value="cached">Locally Cached</option>
+            </param>
+            <when value="cached">
+                <param name="fasta_indexes" type="select" label="Source FASTA Sequence">
+                    <options from_data_table="all_fasta"/>
+                </param>
+            </when>
+            <when value="history">
+                <param name="genome_fasta" type="data" format="fasta" label="Source FASTA Sequence"/>
+            </when>
+        </conditional>
+    </xml>
+    <xml name="gff3_input">
+        <param label="GFF3 Annotations" name="gff3_data" type="data" format="gff3"/>
+    </xml>
+    <xml name="input/gff3+fasta">
+        <expand macro="gff3_input"/>
+        <expand macro="genome_selector"/>
+    </xml>
+    <token name="@INPUT_GFF@">
+     '$gff3_data'
+ </token>
+    <token name="@INPUT_FASTA@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR_PRE@">
+    #if $reference_genome.reference_genome_source == 'history':
+            ln -s '$reference_genome.genome_fasta' genomeref.fa;
+    #end if
+ </token>
+    <token name="@GENOME_SELECTOR@">
+    #if str($reference_genome.reference_genome_source) == 'cached':
+            '${reference_genome.fasta_indexes.fields.path}'
+    #else if str($reference_genome.reference_genome_source) == 'history':
+            genomeref.fa
+    #end if
+ </token>
+</macros>