Mercurial > repos > gianmarco_piccinno > project_rm

--- a/Project_RM/codon_usage2.py	Sun Dec 09 06:00:26 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-#!/usr/bin/env python
-
-import Bio as Bio
-from Bio import SeqIO
-from Bio.Data import CodonTable
-import re
-from pprint import pprint
-import argparse as ap
-import sys
-import os
-import pandas as pd
-#from BCBio import GFF
-
-
-def read_input(data = "example.fna", type_ = "fasta"):
-
-    """
-    Accepted formats:
-        - fasta (multifasta)
-        - gff
-        - gbk
-
-    """
-
-
-    seqs = ""
-
-    if type_ == "fasta":
-        with open(data, "rU") as handle:
-            for record in SeqIO.parse(handle, type_):
-                seqs = seqs + str(record.seq)
-
-
-    #elif type_ == "gff":
-    #    with open(data, "rU") as handle:
-    #        for record in GFF.parse(handle):
-    #            seqs = seqs + str(record.seq)
-
-    elif type_ == "gbk":
-        with open(data, "rU") as input_handle:
-                for record in SeqIO.parse(input_handle, "genbank"):
-                    seqs = seqs + str(record.seq)
-
-
-    return seqs
-
-def codon_usage(seqs, codonTable):
-
-    codon_usage = {}
-    tmp = [x for x in re.split(r'(\w{3})', seqs) if x != ""]
-
-    b_cod_table = CodonTable.unambiguous_dna_by_name[codonTable].forward_table
-
-
-    for cod in CodonTable.unambiguous_dna_by_name[codonTable].stop_codons:
-        b_cod_table[cod] = "_Stop"
-
-    for cod in CodonTable.unambiguous_dna_by_name[codonTable].start_codons:
-            b_cod_table[cod + " Start"] = b_cod_table[cod]
-            b_cod_table.pop(cod)
-
-    aas = set(b_cod_table.values())
-
-
-    for aa in aas:
-        codon_usage[aa] = {}
-        for codon in b_cod_table.keys():
-            if b_cod_table[codon] == aa:
-                codon_usage[aa][codon] = tmp.count(codon.split(" ")[0])
-
-
-    tups = {(outerKey, innerKey): values for outerKey, innerDict in codon_usage.iteritems() for innerKey, values in innerDict.iteritems()}
-
-    codon_usage_ = pd.DataFrame(pd.Series(tups), columns = ["Count"])
-    codon_usage_.index = codon_usage_.index.set_names(["AA", "Codon"])
-    codon_usage_['Proportion'] = codon_usage_.groupby(level=0).transform(lambda x: (x / x.sum()).round(2))
-
-    return {"Dictionary": codon_usage, "Tuples": tups, "Table": codon_usage_}
-
-if __name__ == '__main__':
-
-    parser = ap.ArgumentParser(description=
-    'This script takes as input gff, gbk and single or multifasta files and \n'
-    'compute the codon usage for a specified codon table.\n'
-    'Usage:\n'
-    'python codon_usage.py -i example.gbk -t genebank -o gbk_example -c Bacterial\n'
-    'python codon_usage.py -i example.ffn -t fasta -o fasta_example -c Bacterial\n'
-    'python codon_usage.py -i example.gff -t gff -o gff_example -c Bacterial\n',
-    formatter_class=ap.RawTextHelpFormatter)
-
-    parser.add_argument('-i','--input', help='The path to the input file',required=True)
-    parser.add_argument('-t','--type', help=
-    'The format of the file [genebank, fasta, gff ...]', required=True)
-    parser.add_argument('-c','--codonTable', help=
-    'The codon table to be used [Standard, Bacterial, Archaeal ...]\n'
-    'Alternative Flatworm Mitochondrial,\\n'
-    'Alternative Yeast Nuclear,\n'
-    'Archaeal,\n'
-    'Ascidian Mitochondrial,\n'
-    'Bacterial,\n'
-    'Blastocrithidia Nuclear,\n'
-    'Blepharisma Macronuclear,\n'
-    'Candidate Division SR1,\n'
-    'Chlorophycean Mitochondrial,\n'
-    'Ciliate Nuclear,\n'
-    'Coelenterate Mitochondrial,\n'
-    'Condylostoma Nuclear,\n'
-    'Dasycladacean Nuclear,\n'
-    'Echinoderm Mitochondrial,\n'
-    'Euplotid Nuclear,\n'
-    'Flatworm Mitochondrial,\n'
-    'Gracilibacteria,\n'
-    'Hexamita Nuclear,\n'
-    'Invertebrate Mitochondrial,\n'
-    'Karyorelict Nuclear,\n'
-    'Mesodinium Nuclear,\n'
-    'Mold Mitochondrial,\n'
-    'Mycoplasma,\n'
-    'Pachysolen tannophilus Nuclear,\n'
-    'Peritrich Nuclear,\n'
-    'Plant Plastid,\n'
-    'Protozoan Mitochondrial,\n'
-    'Pterobranchia Mitochondrial,\n'
-    'SGC0,\n'
-    'SGC1,\n'
-    'SGC2,\n'
-    'SGC3,\n'
-    'SGC4,\n'
-    'SGC5,\n'
-    'SGC8,\n'
-    'SGC9,\n'
-    'Scenedesmus obliquus Mitochondrial,\n'
-    'Spiroplasma,\n'
-    'Standard,\n'
-    'Thraustochytrium Mitochondrial,\n'
-    'Trematode Mitochondrial,\n'
-    'Vertebrate Mitochondrial,\n'
-    'Yeast Mitochondrial\n', required=True)
-
-    parser.add_argument('-o','--output', help='Description for bar argument', required=True)
-    args = vars(parser.parse_args())
-
-    seqs = read_input(data=args['input'], type_=args['type'])
-    out = codon_usage(seqs, args['codonTable'])
-
-    with open(args['output'], "w") as outf:
-        out["Table"].to_csv(outf, sep="\t", index_label=["AA", "Codon"])
-
-
\ No newline at end of file
--- a/Project_RM/codon_usage_complete.xml	Sun Dec 09 06:00:26 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,39 +0,0 @@
-<tool id="codon_usage" name="Codon Usage" version="0.1.0">
-  <description>for each sequence in a file</description>
-  <requirements>
-    <requirement type="python module">biopython</requirement>
-    <requirement type="python module">pandas</requirement>
-  </requirements>
-
-  <command interpreter="python">codon_usage2.py python -i $input -t $input_type -o $output -c $codon_table</command>
-  <inputs>
-    <param name="input" format="fasta" type="data" label="Source file"/>
-
-    <param name="input_type" type="select" format="text">
-			<label>Indicate the input file format</label>
-				<option value="fasta">Fasta</option>
-				<option value="gbk">gbk</option>
-				<option value="gff">gff</option>
-		</param>
-
-
-
-  </inputs>
-
-
-
-  <outputs>
-    <data format="tabular" name="output" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="fa_gc_content_input.fa"/>
-      <output name="out_file1" file="fa_gc_content_output.txt"/>
-    </test>
-  </tests>
-
-  <help>
-This tool compute codon usage of an annotated genome [preferably Prokaryotes].
-  </help>
-</tool>