Mercurial > repos > lldelisle > fromgtftobed12
comparison fromgtfTobed12.py @ 1:6fd4b3b90220 draft default tip
planemo upload for repository https://github.com/lldelisle/tools-lldelisle/tree/master/tools/fromgtfTobed12 commit 15b8c2cc83708044413a152322bcbfca8a74d29a
author | lldelisle |
---|---|
date | Fri, 03 Nov 2023 14:13:51 +0000 |
parents | 418e4d0fe0bd |
children |
comparison
equal
deleted
inserted
replaced
0:418e4d0fe0bd | 1:6fd4b3b90220 |
---|---|
21 "feature in your GTF file. You may want to use the " | 21 "feature in your GTF file. You may want to use the " |
22 "`disable_infer_transcripts=True` option to speed up " | 22 "`disable_infer_transcripts=True` option to speed up " |
23 "database creation") | 23 "database creation") |
24 | 24 |
25 | 25 |
26 def convert_gtf_to_bed(fn, fo, useGene, mergeTranscripts, | 26 def convert_gtf_to_bed(fn, fo, preferedName, mergeTranscripts, |
27 mergeTranscriptsAndOverlappingExons, ucsc): | 27 mergeTranscriptsAndOverlappingExons, ucsc): |
28 db = gffutils.create_db(fn, ':memory:') | 28 db = gffutils.create_db(fn, ':memory:') |
29 # For each transcript: | 29 # For each transcript: |
30 prefered_name = "transcript_name" | 30 if preferedName is not None: |
31 if useGene or mergeTranscripts or mergeTranscriptsAndOverlappingExons: | 31 prefered_name = preferedName |
32 elif mergeTranscripts or mergeTranscriptsAndOverlappingExons: | |
32 prefered_name = "gene_name" | 33 prefered_name = "gene_name" |
34 else: | |
35 prefered_name = "transcript_name" | |
33 if mergeTranscripts or mergeTranscriptsAndOverlappingExons: | 36 if mergeTranscripts or mergeTranscriptsAndOverlappingExons: |
34 all_items = db.features_of_type("gene", order_by='start') | 37 all_items = db.features_of_type("gene", order_by='start') |
35 else: | 38 else: |
36 all_items = db.features_of_type("transcript", order_by='start') | 39 all_items = db.features_of_type("transcript", order_by='start') |
37 for tr in all_items: | 40 for tr in all_items: |
125 argp.add_argument('input', default=None, | 128 argp.add_argument('input', default=None, |
126 help="Input gtf file (can be gzip).") | 129 help="Input gtf file (can be gzip).") |
127 argp.add_argument('--output', default=sys.stdout, | 130 argp.add_argument('--output', default=sys.stdout, |
128 type=argparse.FileType('w'), | 131 type=argparse.FileType('w'), |
129 help="Output bed12 file.") | 132 help="Output bed12 file.") |
130 argp.add_argument('--useGene', action="store_true", | |
131 help="Use the gene name instead of the " | |
132 "transcript name.") | |
133 argp.add_argument('--ucscformat', action="store_true", | 133 argp.add_argument('--ucscformat', action="store_true", |
134 help="If you want that all chromosome names " | 134 help="If you want that all chromosome names " |
135 "begin with 'chr'.") | 135 "begin with 'chr'.") |
136 argp.add_argument('--preferedName', default=None, | |
137 help="Name to use for bed output.") | |
136 group = argp.add_mutually_exclusive_group() | 138 group = argp.add_mutually_exclusive_group() |
137 group.add_argument('--mergeTranscripts', action="store_true", | 139 group.add_argument('--mergeTranscripts', action="store_true", |
138 help="Merge all transcripts into a single " | 140 help="Merge all transcripts into a single " |
139 "entry to have one line per gene.") | 141 "entry to have one line per gene.") |
140 group.add_argument('--mergeTranscriptsAndOverlappingExons', | 142 group.add_argument('--mergeTranscriptsAndOverlappingExons', |
142 help="Merge all transcripts into a single " | 144 help="Merge all transcripts into a single " |
143 "entry to have one line per gene and merge" | 145 "entry to have one line per gene and merge" |
144 " overlapping exons.") | 146 " overlapping exons.") |
145 | 147 |
146 args = argp.parse_args() | 148 args = argp.parse_args() |
147 convert_gtf_to_bed(args.input, args.output, args.useGene, | 149 convert_gtf_to_bed(args.input, args.output, args.preferedName, |
148 args.mergeTranscripts, | 150 args.mergeTranscripts, |
149 args.mergeTranscriptsAndOverlappingExons, | 151 args.mergeTranscriptsAndOverlappingExons, |
150 args.ucscformat) | 152 args.ucscformat) |