Galaxy |

Changeset 0:5b61f1b564b3 (2018-12-11)

Next changeset 1:da98f05fd6be (2018-12-11)

Commit message:
Uploaded

added:
._.shed.yml
._codon_usage.py
._codon_usage.xml
.shed.yml
codon_usage.py
codon_usage.xml

diff -r 000000000000 -r 5b61f1b564b3 ._.shed.yml

Binary file ._.shed.yml has changed

diff -r 000000000000 -r 5b61f1b564b3 ._codon_usage.py

Binary file ._codon_usage.py has changed

diff -r 000000000000 -r 5b61f1b564b3 ._codon_usage.xml

Binary file ._codon_usage.xml has changed

diff -r 000000000000 -r 5b61f1b564b3 .shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml Tue Dec 11 12:27:52 2018 -0500

@@ -0,0 +1,11 @@
+name: codon_usage
+owner: gianmarco_piccinno
+categories:
+  - Text Manipulation
+description: RM-tool
+long_description: |
+  RM-tool - dev
+type: unrestricted
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  descriptor_template: "Wrapper for Codon Usage application: {{ tool_name }}."
\ No newline at end of file

diff -r 000000000000 -r 5b61f1b564b3 codon_usage.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/codon_usage.py Tue Dec 11 12:27:52 2018 -0500

[

@@ -0,0 +1,75 @@
+#!/home/gianmarco/galaxy-python/python
+
+import Bio
+from Bio import SeqIO
+from Bio.Data import CodonTable
+import re
+import sys
+import os
+import pandas as pd
+
+def read_input(data = "example.fna"):
+
+    seqs = ""
+    with open(data, "rU") as handle:
+        for record in SeqIO.parse(handle, "fasta"):
+            seqs = seqs + str(record.seq)
+
+    return seqs
+
+def codon_usage(seqs, codonTable):
+
+    codon_usage = {}
+    tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""]
+
+    b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table
+
+
+    for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons:
+        b_cod_table[cod] = "_Stop"
+
+    for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:
+            b_cod_table[cod + " Start"] = b_cod_table[cod]
+            b_cod_table.pop(cod)
+
+    aas = set(b_cod_table.values())
+
+
+    for aa in aas:
+        codon_usage[aa] = {}
+        for codon in b_cod_table.keys():
+            if b_cod_table[codon] == aa:
+                codon_usage[aa][codon] = tmp.count(codon.split(" ")[0])
+
+
+    tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}
+
+    #aas_ = set(tups.keys())
+
+    #stops_ = {el for el in aas_ if el[0] == "Stop"}
+    #aas_ = list(aas_.difference(stops_))
+    #stops_ = list(stops_)
+    #aas_.sort()
+    #stops_.sort()
+
+    codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])
+    codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])
+    #codon_usage_.index.reindex(pd.MultiIndex.from_tuples([aas_, stops_], names=('AA', 'Codon')), level=[0,1])
+
+
+    codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))
+
+    return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}
+
+
+
+if __name__ == '__main__':
+
+
+    seqs = read_input(data=sys.argv[1])
+    out = codon_usage(seqs,"Bacterial")
+
+
+    with open(sys.argv[2], "w") as outf:
+        out["Table"].to_csv(outf, sep="\t")
+    #sys.stdout.write(out['Table'])
\ No newline at end of file

diff -r 000000000000 -r 5b61f1b564b3 codon_usage.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/codon_usage.xml Tue Dec 11 12:27:52 2018 -0500

[

@@ -0,0 +1,35 @@
+<tool id="codon_usage" name="Codon Usage" version="0.2.0">
+  <description>for each sequence in a file</description>
+  <requirements>
+    <requirement type=“package” version=“3.6.0”>python</requirement>
+    <requirement type=“package” version=“1.72”>biopython</requirement>
+    <requirement type=“package” version=“0.23.4”>pandas</requirement>
+  </requirements>
+
+  <command>python $__tool_directory__/codon_usage.py -i $input -t $input_type -o $output -c $codon_table</command>
+  <inputs>
+    <param name="input" format="fasta" type="data" label="Source file"/>
+
+    <param name="input_type" type="select" format="text">
+ <label>Indicate the input file format</label>
+ <option value="fasta">Fasta</option>
+ <option value="gbk">gbk</option>
+ </param>
+
+    <param name="codon_table" type="select" format="text">
+ <label>Choose the proper codon table for your organism)</label>
+        <option value="Archaeal"> Archaeal</option>
+        <option value="Bacterial">Bacterial</option>
+        <option value="Standard">Standard</option>
+
+ </param>
+  </inputs>
+
+  <outputs>
+    <data format="tabular" name="output" />
+  </outputs>
+
+  <help>
+This tool compute codon usage of an annotated genome [preferably Prokaryotes].
+  </help>
+</tool>