# HG changeset patch
# User cpt
# Date 1655471540 0
# Node ID 0d2226e1c5f69adbd34edd7cdb6b21247188788b
Uploaded
diff -r 000000000000 -r 0d2226e1c5f6 cpt_read_garnier/cpt-macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_read_garnier/cpt-macros.xml Fri Jun 17 13:12:20 2022 +0000
@@ -0,0 +1,115 @@
+
+
+
+
+ python
+ biopython
+ requests
+
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Ross},
+ title = {CPT Galaxy Tools},
+ year = {2020-},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {E. Mijalis, H. Rasche},
+ title = {CPT Galaxy Tools},
+ year = {2013-2017},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {A. Criscione},
+ title = {CPT Galaxy Tools},
+ year = {2019-2021},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ 10.1371/journal.pcbi.1008214
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
+
+
+ @unpublished{galaxyTools,
+ author = {C. Maughmer},
+ title = {CPT Galaxy Tools},
+ year = {2017-2020},
+ note = {https://github.com/tamu-cpt/galaxy-tools/}
+ }
+
+
+
+
diff -r 000000000000 -r 0d2226e1c5f6 cpt_read_garnier/macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_read_garnier/macros.xml Fri Jun 17 13:12:20 2022 +0000
@@ -0,0 +1,85 @@
+
+
+
+
+ python
+ biopython
+ cpt_gffparser
+
+
+
+
+ "$blast_tsv"
+
+
+
+
+
+
+ "$blast_xml"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "$gff3_data"
+
+
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+
+
+#if $reference_genome.reference_genome_source == 'history':
+ ln -s $reference_genome.genome_fasta genomeref.fa;
+#end if
+
+
+#if str($reference_genome.reference_genome_source) == 'cached':
+ "${reference_genome.fasta_indexes.fields.path}"
+#else if str($reference_genome.reference_genome_source) == 'history':
+ genomeref.fa
+#end if
+
+
+
+
+
+
+ "$sequences"
+
+
+
+
+
diff -r 000000000000 -r 0d2226e1c5f6 cpt_read_garnier/reading_garnier_output.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_read_garnier/reading_garnier_output.py Fri Jun 17 13:12:20 2022 +0000
@@ -0,0 +1,133 @@
+#!/usr/bin/env python
+
+import csv
+import argparse
+
+# import sys
+
+# This function reads through the tagseq file and outputs a list of sequence names and the lengths of each sequence.
+def garnier_sequences(tagseq_file=None):
+ # open the file and create blank lists
+ f = tagseq_file # open(tagseq_file, 'r')
+ f.seek(0)
+ sequence = []
+ lengths = []
+
+ # for each line the in file, search for the words 'Sequence' and 'to' to find the sequence name and length,
+ # respectively. Then add sequence names and lengths to the proper lists
+ for line in f:
+ words = line.split()
+ if line.startswith("# Sequence:"):
+ # if 'Sequence:' in line:
+ # if words[1] == 'Sequence:':
+ sequence += [words[words.index("Sequence:") + 1]]
+ # if words[5] == 'to:':
+ # lengths += [int(words[6])]
+ if words.index("to:"):
+ lengths += [int(words[words.index("to:") + 1])]
+ # return the sequence names and lengths
+ return sequence, lengths
+
+
+# This function extracts the helix, sheet, turn, and coil predictions from the file. The predictions for each type of
+# secondary structure are joined together in one string.
+def garnier_secondary_struct(tagseq_file=None):
+ # opens the file and sets variables for the structural predictions
+ f = tagseq_file # open(tagseq_file, 'r')
+ helix = ""
+ turns = ""
+ coil = ""
+ sheet = ""
+
+ # if the first work in the line indicates a structural prediction, it adds the rest of the line to the right
+ # prediction string.
+ for line in f:
+ words = line.split()
+ if len(words) > 0:
+ if words[0] in "helix":
+ helix += str(line[6:]).rstrip("\n")
+ elif words[0] in "sheet":
+ sheet += str(line[6:]).rstrip("\n")
+ elif words[0] in "turns":
+ turns += str(line[6:]).rstrip("\n")
+ elif words[0] in "coil":
+ coil += str(line[6:]).rstrip("\n")
+ # f.close()
+ # returns the four structural prediction strings
+ return helix, turns, coil, sheet
+
+
+# This functions cuts the strings based on the lengths of the original sequences. Lengths are given in a list.
+def vector_cutter(vector, lengths_to_cut):
+ # sets up iteration variables
+ start = 0
+ end = lengths_to_cut[0]
+ maximum = len(lengths_to_cut)
+ # creates output list
+ output = []
+ # loops through the number of sequences based on the number of lengths
+ for i in range(maximum):
+ # outputs list of sequence strings
+ output += [str(vector[start:end])]
+ start = end
+ if i + 1 != maximum:
+ end += lengths_to_cut[i + 1]
+ # returns list of strings. Each sequence has a string included in the list.
+ return output
+
+
+# this function takes the helix, turn, sheet, and coil predictions for each sequence and creates a single structural
+# prediction string.
+def single_prediction(helix, sheet, turns, coil):
+ # sets output list
+ secondary_structure = []
+ # checks to make sure each of the strings is the same length
+ if len(helix) == len(sheet) == len(coil) == len(turns):
+ # loops through the length of each sequence, and when the value is not a blank it is added to the output
+ # prediction list.
+ for j in range(len(helix)):
+ if helix[j] != " ":
+ secondary_structure += [str(helix[j])]
+ elif sheet[j] != " ":
+ secondary_structure += [str(sheet[j])]
+ elif coil[j] != " ":
+ secondary_structure += [str(coil[j])]
+ else:
+ secondary_structure += [str(turns[j])]
+ # returns the output prediction list for the sequence
+ return secondary_structure
+
+
+if __name__ == "__main__":
+ # Grab all of the filters from our plugin loader
+ parser = argparse.ArgumentParser(
+ description="Read Garnier Secondary Structure Prediction"
+ )
+ parser.add_argument(
+ "tagseq_file", type=argparse.FileType("r"), help="Tagseq file input"
+ )
+ args = parser.parse_args()
+
+ # opens the tagseq file and prepares for writing csv
+ # f = open(sys.stdout, 'w', newline='')
+ # writer = csv.writer(f)
+
+ # reads tagseq file for helix, turn, coil, and sheet sequences as well as for names and lengths of the sequences
+ # summarized in the tagseq file#!/usr/bin/env python\r
+ Hel, Tur, Coi, She = garnier_secondary_struct(**vars(args))
+ names, gives = garnier_sequences(**vars(args))
+
+ # cut each of the structural prediction strings so that they are individual sequences
+ Helix = vector_cutter(Hel, gives)
+ Sheet = vector_cutter(She, gives)
+ Turns = vector_cutter(Tur, gives)
+ Coil = vector_cutter(Coi, gives)
+
+ # for each sequence compile the four types of structural predictions into a single prediction, and output the final
+ # prediction in csv format and to the screen
+ for i in range(len(Helix)):
+ Final = single_prediction(Helix[i], Sheet[i], Turns[i], Coil[i])
+ # csv.writerow(['Sequence: '] + [names[i]])
+ # csv.writerow(Final)
+ print("Sequence Name: " + "\t" + names[i])
+ print("\t".join(Final))
diff -r 000000000000 -r 0d2226e1c5f6 cpt_read_garnier/reading_garnier_output.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_read_garnier/reading_garnier_output.xml Fri Jun 17 13:12:20 2022 +0000
@@ -0,0 +1,28 @@
+
+
+ read garnier tool output
+
+ macros.xml
+ cpt-macros.xml
+
+
+ $output]]>
+
+
+
+
+
+
+
+
+