# HG changeset patch
# User gianmarco_piccinno
# Date 1544542698 18000
# Node ID 8cc325f758b0b285cadb18131549ae6d2e2c586e
# Parent 55d338c493f40114839c0470acfeba3df17f289f
Uploaded
diff -r 55d338c493f4 -r 8cc325f758b0 Project_RM/Project_RM/codon_usage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Project_RM/Project_RM/codon_usage.py Tue Dec 11 10:38:18 2018 -0500
@@ -0,0 +1,75 @@
+#!/home/gianmarco/galaxy-python/python
+
+import Bio
+from Bio import SeqIO
+from Bio.Data import CodonTable
+import re
+import sys
+import os
+import pandas as pd
+
+def read_input(data = "example.fna"):
+
+ seqs = ""
+ with open(data, "rU") as handle:
+ for record in SeqIO.parse(handle, "fasta"):
+ seqs = seqs + str(record.seq)
+
+ return seqs
+
+def codon_usage(seqs, codonTable):
+
+ codon_usage = {}
+ tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""]
+
+ b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table
+
+
+ for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons:
+ b_cod_table[cod] = "_Stop"
+
+ for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:
+ b_cod_table[cod + " Start"] = b_cod_table[cod]
+ b_cod_table.pop(cod)
+
+ aas = set(b_cod_table.values())
+
+
+ for aa in aas:
+ codon_usage[aa] = {}
+ for codon in b_cod_table.keys():
+ if b_cod_table[codon] == aa:
+ codon_usage[aa][codon] = tmp.count(codon.split(" ")[0])
+
+
+ tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}
+
+ #aas_ = set(tups.keys())
+
+ #stops_ = {el for el in aas_ if el[0] == "Stop"}
+ #aas_ = list(aas_.difference(stops_))
+ #stops_ = list(stops_)
+ #aas_.sort()
+ #stops_.sort()
+
+ codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])
+ codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])
+ #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1])
+
+
+ codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))
+
+ return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}
+
+
+
+if __name__ == '__main__':
+
+
+ seqs = read_input(data=sys.argv[1])
+ out = codon_usage(seqs,"Bacterial")
+
+
+ with open(sys.argv[2], "w") as outf:
+ out["Table"].to_csv(outf, sep="\t")
+ #sys.stdout.write(out['Table'])
\ No newline at end of file
diff -r 55d338c493f4 -r 8cc325f758b0 Project_RM/Project_RM/codon_usage.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Project_RM/Project_RM/codon_usage.xml Tue Dec 11 10:38:18 2018 -0500
@@ -0,0 +1,45 @@
+
+ for each sequence in a file
+
+ python
+ biopython
+ pandas
+
+
+ codon_usage.py -i $input -t $input_type -o $output -c $codon_table
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool compute codon usage of an annotated genome [preferably Prokaryotes].
+
+
diff -r 55d338c493f4 -r 8cc325f758b0 Project_RM/codon_usage.py
--- a/Project_RM/codon_usage.py Tue Dec 11 08:49:02 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#!/home/gianmarco/galaxy-python/python
-
-import Bio
-from Bio import SeqIO
-from Bio.Data import CodonTable
-import re
-import sys
-import os
-import pandas as pd
-
-def read_input(data = "example.fna"):
-
- seqs = ""
- with open(data, "rU") as handle:
- for record in SeqIO.parse(handle, "fasta"):
- seqs = seqs + str(record.seq)
-
- return seqs
-
-def codon_usage(seqs, codonTable):
-
- codon_usage = {}
- tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""]
-
- b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table
-
-
- for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons:
- b_cod_table[cod] = "_Stop"
-
- for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:
- b_cod_table[cod + " Start"] = b_cod_table[cod]
- b_cod_table.pop(cod)
-
- aas = set(b_cod_table.values())
-
-
- for aa in aas:
- codon_usage[aa] = {}
- for codon in b_cod_table.keys():
- if b_cod_table[codon] == aa:
- codon_usage[aa][codon] = tmp.count(codon.split(" ")[0])
-
-
- tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}
-
- #aas_ = set(tups.keys())
-
- #stops_ = {el for el in aas_ if el[0] == "Stop"}
- #aas_ = list(aas_.difference(stops_))
- #stops_ = list(stops_)
- #aas_.sort()
- #stops_.sort()
-
- codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])
- codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])
- #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1])
-
-
- codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))
-
- return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}
-
-
-
-if __name__ == '__main__':
-
-
- seqs = read_input(data=sys.argv[1])
- out = codon_usage(seqs,"Bacterial")
-
-
- with open(sys.argv[2], "w") as outf:
- out["Table"].to_csv(outf, sep="\t")
- #sys.stdout.write(out['Table'])
\ No newline at end of file
diff -r 55d338c493f4 -r 8cc325f758b0 Project_RM/codon_usage.xml
--- a/Project_RM/codon_usage.xml Tue Dec 11 08:49:02 2018 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-
- for each sequence in a file
-
- biopython
- pandas
-
-
- codon_usage.py -i $input -t $input_type -o $output -c $codon_table
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-This tool compute codon usage of an annotated genome [preferably Prokaryotes].
-
-