Mercurial > repos > cpt > cpt_sar_finder
view SAR_finder.py @ 1:112751823323 draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author | cpt |
---|---|
date | Mon, 05 Jun 2023 02:52:57 +0000 |
parents | |
children |
line wrap: on
line source
import sys import argparse import os import re from biopython_parsing import FASTA_parser from file_operations import fasta_from_SAR_dict, gff3_from_SAR_dict, tab_from_SAR_dict from SAR_functions import CheckSequence if __name__ == "__main__": parser = argparse.ArgumentParser(description="SAR Finder") parser.add_argument( "fa", type=argparse.FileType("r"), help="organism's multi fasta file" ) parser.add_argument( "--min", type=int, default=20, help="minimum size of candidate peptide" ) parser.add_argument( "--max", type=int, default=200, help="maximum size of candidate peptide" ) parser.add_argument( "--sar_min", type=int, default=15, help="minimum size of candidate peptide TMD domain", ) parser.add_argument( "--sar_max", type=int, default=24, help="maximum size of candidate peptide TMD domain", ) parser.add_argument( "--out_fa", type=argparse.FileType("w"), help="multifasta output of candidate SAR proteins", default="candidate_SAR.fa", ) parser.add_argument( "--out_stat", type=argparse.FileType("w"), help="summary statistic file for candidate SAR proteins, tab separated", default="candidate_SAR_stats.tsv", ) parser.add_argument( "--out_gff3", type=argparse.FileType("w"), help="multigff3 file for candidate SAR proteins", default="candidate_SAR.gff3", ) args = parser.parse_args() fa_dict = FASTA_parser(fa=args.fa).multifasta_dict() sars = {} for protein_name, protein_data in fa_dict.items(): sar = CheckSequence(protein_name, protein_data) # sar.check_sizes(min=args.min,max=args.max) hydros = sar.shrink_results(sar_min=args.sar_min, sar_max=args.sar_max) sars.update(hydros) gff3_from_SAR_dict(sars, args.out_gff3) tab_from_SAR_dict( sars, args.out_stat, "SGAT", sar_min=args.sar_min, sar_max=args.sar_max ) fasta_from_SAR_dict(sars, args.out_fa) # stat_file_from_SAR_dict(sars,args.out_stat,sar_min=args.sar_min,sar_max=args.sar_max) # fix this whenever ready.