"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit d396d7ff89705cc0dd626ed32c45a9f4029b1b05"
author jay Wed, 12 Jan 2022 20:06:27 +0000 7557b48b2872
line wrap: on
line source
```
import matplotlib
matplotlib.use('Agg')
import os
import sys
sys.path.insert(0, os.path.abspath('..'))
import quantiprot
from quantiprot.utils.feature import Feature, FeatureSet
from quantiprot.metrics.aaindex import get_aa2hydropathy
from quantiprot.metrics.basic import identity
from quantiprot.metrics.ngram import pattern_match, pattern_count
from quantiprot.analysis.ngram import ngram_count
from quantiprot.analysis.ngram import zipf_law_fit
from matplotlib import pyplot as plt

def Run_ngrams(fasta1, fasta2, OutFile ):

fs_aa = FeatureSet("aa patterns")

result_seq = fs_aa(alphasyn_seq)

fs_hp = FeatureSet("hydropathy patterns")
result_seq2 = fs_hp(alphasyn_seq)
result_freq = ngram_count(alphasyn_seq, n=2)

counts = sorted(result_fit["ngram_counts"], reverse=True)
ranks = range(1, len(counts)+1)

slope = result_fit["slope"]
harmonic_num = sum([rank**-slope for rank in ranks])
fitted_counts = [(rank**-slope) / harmonic_num * sum(counts) for rank in ranks]

plt.plot(ranks, counts, 'k', label="empirical")
plt.plot(ranks, fitted_counts, 'k--',
label="Zipf's law\nslope: {:.2f}".format((slope)))
plt.xlabel('rank')
plt.ylabel('count')
plt.xscale('log')
plt.yscale('log')
plt.legend()

plt.savefig(OutFile)

if __name__=="__main__":

import argparse

parser = argparse.ArgumentParser()

required=True,
default=None,
help="First fasta file")

required=True,
default=None,
help="Second fasta file")