Mercurial > repos > jay > pdaug_ml_models
view PDAUG_Peptide_Ngrams/PDAUG_Peptide_Ngrams.py @ 0:0973f093d98f draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 02:31:40 +0000 |
parents | |
children |
line wrap: on
line source
import matplotlib matplotlib.use('Agg') import os import sys sys.path.insert(0, os.path.abspath('..')) import quantiprot from quantiprot.utils.io import load_fasta_file from quantiprot.utils.feature import Feature, FeatureSet from quantiprot.metrics.aaindex import get_aa2hydropathy from quantiprot.metrics.basic import identity from quantiprot.metrics.ngram import pattern_match, pattern_count from quantiprot.analysis.ngram import ngram_count from quantiprot.analysis.ngram import zipf_law_fit from matplotlib import pyplot as plt def Run_ngrams(fasta1, fasta2, OutFile ): alphasyn_seq = load_fasta_file(fasta1) amyload_pos_seq = load_fasta_file(fasta2) fs_aa = FeatureSet("aa patterns") fs_aa.add(identity) fs_aa.add(pattern_match, pattern='VT', padded=True) fs_aa.add(pattern_count, pattern='VT') result_seq = fs_aa(alphasyn_seq) fs_hp = FeatureSet("hydropathy patterns") fs_hp.add(Feature(get_aa2hydropathy())) fs_hp.add(Feature(get_aa2hydropathy()).then(pattern_match, pattern=[0.0, 2.0], metric='taxi', radius=1.0)) result_seq2 = fs_hp(alphasyn_seq) result_freq = ngram_count(alphasyn_seq, n=2) result_fit = zipf_law_fit(amyload_pos_seq, n=3, verbose=True) counts = sorted(result_fit["ngram_counts"], reverse=True) ranks = range(1, len(counts)+1) slope = result_fit["slope"] harmonic_num = sum([rank**-slope for rank in ranks]) fitted_counts = [(rank**-slope) / harmonic_num * sum(counts) for rank in ranks] plt.plot(ranks, counts, 'k', label="empirical") plt.plot(ranks, fitted_counts, 'k--', label="Zipf's law\nslope: {:.2f}".format((slope))) plt.xlabel('rank') plt.ylabel('count') plt.xscale('log') plt.yscale('log') plt.legend() plt.savefig(OutFile) if __name__=="__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("-f1", "--Fasta1", required=True, default=None, help="First fasta file") parser.add_argument("-f2", "--Fasta2", required=True, default=None, help="Second fasta file") parser.add_argument("--OutFile", required=True, help="HTML out file", default="report.html") args = parser.parse_args() Run_ngrams(args.Fasta1, args.Fasta2, args.OutFile)