Mercurial > repos > cpt > cpt_get_orfs
annotate get_orfs_or_cdss.py @ 6:f56389d8abda draft default tip
planemo upload commit f33bdf952d796c5d7a240b132af3c4cbd102decc
| author | cpt | 
|---|---|
| date | Fri, 05 Jan 2024 05:52:07 +0000 | 
| parents | f97bc7f587a1 | 
| children | 
| rev | line source | 
|---|---|
| 4 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 2 import argparse | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 3 import logging | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 4 from cpt import OrfFinder | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 5 | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 6 logging.basicConfig() | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 7 log = logging.getLogger() | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 8 | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 9 | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 10 if __name__ == "__main__": | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 11 parser = argparse.ArgumentParser(description="Get open reading frames") | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 12 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Fasta file") | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 13 | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 14 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 15 "-f", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 16 "--format", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 17 dest="seq_format", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 18 default="fasta", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 19 help="Sequence format (e.g. fasta, fastq, sff)", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 20 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 21 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 22 "--table", dest="table", default=1, help="NCBI Translation table", type=int | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 23 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 24 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 25 "-t", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 26 "--ftype", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 27 dest="ftype", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 28 choices=("CDS", "ORF"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 29 default="ORF", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 30 help="Find ORF or CDSs", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 31 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 32 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 33 "-e", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 34 "--ends", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 35 dest="ends", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 36 choices=("open", "closed"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 37 default="closed", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 38 help="Open or closed. Closed ensures start/stop codons are present", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 39 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 40 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 41 "-m", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 42 "--mode", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 43 dest="mode", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 44 choices=("all", "top", "one"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 45 default="all", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 46 help="Output all ORFs/CDSs from sequence, all ORFs/CDSs " | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 47 "with max length, or first with maximum length", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 48 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 49 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 50 "--min_len", dest="min_len", default=10, help="Minimum ORF/CDS length", type=int | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 51 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 52 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 53 "--strand", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 54 dest="strand", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 55 choices=("both", "forward", "reverse"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 56 default="both", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 57 help="select strand", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 58 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 59 | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 60 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 61 "--on", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 62 dest="out_nuc", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 63 type=argparse.FileType("w"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 64 default="out.fna", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 65 help="Output nucleotide sequences", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 66 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 67 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 68 "--op", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 69 dest="out_prot", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 70 type=argparse.FileType("w"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 71 default="out.fa", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 72 help="Output protein sequences", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 73 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 74 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 75 "--ob", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 76 dest="out_bed", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 77 type=argparse.FileType("w"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 78 default="out.bed", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 79 help="Output BED file", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 80 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 81 parser.add_argument( | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 82 "--og", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 83 dest="out_gff3", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 84 type=argparse.FileType("w"), | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 85 default="out.gff3", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 86 help="Output GFF3 file", | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 87 ) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 88 parser.add_argument("-v", action="version", version="0.3.0") | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 89 args = parser.parse_args() | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 90 | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 91 of = OrfFinder(args.table, args.ftype, args.ends, args.min_len, args.strand) | 
| 
f97bc7f587a1
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
 cpt parents: diff
changeset | 92 of.locate(args.fasta_file, args.out_nuc, args.out_prot, args.out_bed, args.out_gff3) | 
