Mercurial > repos > dave > interval2maf
comparison interval2maf.py @ 0:d38f317fa9d4 draft default tip
Uploaded
| author | dave |
|---|---|
| date | Fri, 10 Jul 2020 16:05:40 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d38f317fa9d4 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Reads a list of intervals and a maf. Produces a new maf containing the | |
| 4 blocks or parts of blocks in the original that overlapped the intervals. | |
| 5 | |
| 6 If a MAF file, not UID, is provided the MAF file is indexed before being processed. | |
| 7 | |
| 8 NOTE: If two intervals overlap the same block it will be written twice. | |
| 9 | |
| 10 usage: %prog maf_file [options] | |
| 11 -d, --dbkey=d: Database key, ie hg17 | |
| 12 -c, --chromCol=c: Column of Chr | |
| 13 -s, --startCol=s: Column of Start | |
| 14 -e, --endCol=e: Column of End | |
| 15 -S, --strandCol=S: Column of Strand | |
| 16 -t, --mafType=t: Type of MAF source to use | |
| 17 -m, --mafFile=m: Path of source MAF file, if not using cached version | |
| 18 -I, --mafIndex=I: Path of precomputed source MAF file index, if not using cached version | |
| 19 -i, --interval_file=i: Input interval file | |
| 20 -o, --output_file=o: Output MAF file | |
| 21 -p, --species=p: Species to include in output | |
| 22 -P, --split_blocks_by_species=P: Split blocks by species | |
| 23 -r, --remove_all_gap_columns=r: Remove all Gap columns | |
| 24 -l, --indexLocation=l: Override default maf_index.loc file | |
| 25 -z, --mafIndexFile=z: Directory of local maf index file ( maf_index.loc or maf_pairwise.loc ) | |
| 26 """ | |
| 27 # Dan Blankenberg | |
| 28 from __future__ import print_function | |
| 29 | |
| 30 import bx.align.maf | |
| 31 import bx.intervals.io | |
| 32 from bx.cookbook import doc_optparse | |
| 33 | |
| 34 from galaxy.tools.util import maf_utilities | |
| 35 | |
| 36 | |
| 37 def __main__(): | |
| 38 index = index_filename = None | |
| 39 | |
| 40 # Parse Command Line | |
| 41 options, args = doc_optparse.parse(__doc__) | |
| 42 | |
| 43 if options.dbkey: | |
| 44 dbkey = options.dbkey | |
| 45 else: | |
| 46 dbkey = None | |
| 47 if dbkey in [None, "?"]: | |
| 48 maf_utilities.tool_fail("You must specify a proper build in order to extract alignments. You can specify your genome build by clicking on the pencil icon associated with your interval file.") | |
| 49 | |
| 50 species = maf_utilities.parse_species_option(options.species) | |
| 51 | |
| 52 if options.chromCol: | |
| 53 chromCol = int(options.chromCol) - 1 | |
| 54 else: | |
| 55 maf_utilities.tool_fail("Chromosome column not set, click the pencil icon in the history item to set the metadata attributes.") | |
| 56 | |
| 57 if options.startCol: | |
| 58 startCol = int(options.startCol) - 1 | |
| 59 else: | |
| 60 maf_utilities.tool_fail("Start column not set, click the pencil icon in the history item to set the metadata attributes.") | |
| 61 | |
| 62 if options.endCol: | |
| 63 endCol = int(options.endCol) - 1 | |
| 64 else: | |
| 65 maf_utilities.tool_fail("End column not set, click the pencil icon in the history item to set the metadata attributes.") | |
| 66 | |
| 67 if options.strandCol: | |
| 68 strandCol = int(options.strandCol) - 1 | |
| 69 else: | |
| 70 strandCol = -1 | |
| 71 | |
| 72 if options.interval_file: | |
| 73 interval_file = options.interval_file | |
| 74 else: | |
| 75 maf_utilities.tool_fail("Input interval file has not been specified.") | |
| 76 | |
| 77 if options.output_file: | |
| 78 output_file = options.output_file | |
| 79 else: | |
| 80 maf_utilities.tool_fail("Output file has not been specified.") | |
| 81 | |
| 82 split_blocks_by_species = remove_all_gap_columns = False | |
| 83 if options.split_blocks_by_species and options.split_blocks_by_species == 'split_blocks_by_species': | |
| 84 split_blocks_by_species = True | |
| 85 if options.remove_all_gap_columns and options.remove_all_gap_columns == 'remove_all_gap_columns': | |
| 86 remove_all_gap_columns = True | |
| 87 else: | |
| 88 remove_all_gap_columns = True | |
| 89 # Finish parsing command line | |
| 90 | |
| 91 # Open indexed access to MAFs | |
| 92 if options.mafType: | |
| 93 if options.indexLocation: | |
| 94 index = maf_utilities.maf_index_by_uid(options.mafType, options.indexLocation) | |
| 95 else: | |
| 96 index = maf_utilities.maf_index_by_uid(options.mafType, options.mafIndexFile) | |
| 97 if index is None: | |
| 98 maf_utilities.tool_fail("The MAF source specified (%s) appears to be invalid." % (options.mafType)) | |
| 99 elif options.mafFile: | |
| 100 index, index_filename = maf_utilities.open_or_build_maf_index(options.mafFile, options.mafIndex, species=[dbkey]) | |
| 101 if index is None: | |
| 102 maf_utilities.tool_fail("Your MAF file appears to be malformed.") | |
| 103 else: | |
| 104 maf_utilities.tool_fail("Desired source MAF type has not been specified.") | |
| 105 | |
| 106 # Create MAF writter | |
| 107 out = bx.align.maf.Writer(open(output_file, "w")) | |
| 108 | |
| 109 # Iterate over input regions | |
| 110 num_blocks = 0 | |
| 111 num_regions = None | |
| 112 for num_regions, region in enumerate(bx.intervals.io.NiceReaderWrapper(open(interval_file, 'r'), chrom_col=chromCol, start_col=startCol, end_col=endCol, strand_col=strandCol, fix_strand=True, return_header=False, return_comments=False)): | |
| 113 src = maf_utilities.src_merge(dbkey, region.chrom) | |
| 114 for block in index.get_as_iterator(src, region.start, region.end): | |
| 115 if split_blocks_by_species: | |
| 116 blocks = [new_block for new_block in maf_utilities.iter_blocks_split_by_species(block) if maf_utilities.component_overlaps_region(new_block.get_component_by_src_start(dbkey), region)] | |
| 117 else: | |
| 118 blocks = [block] | |
| 119 for block in blocks: | |
| 120 block = maf_utilities.chop_block_by_region(block, src, region) | |
| 121 if block is not None: | |
| 122 if species is not None: | |
| 123 block = block.limit_to_species(species) | |
| 124 block = maf_utilities.orient_block_by_region(block, src, region) | |
| 125 if remove_all_gap_columns: | |
| 126 block.remove_all_gap_columns() | |
| 127 out.write(block) | |
| 128 num_blocks += 1 | |
| 129 | |
| 130 # Close output MAF | |
| 131 out.close() | |
| 132 | |
| 133 # remove index file if created during run | |
| 134 maf_utilities.remove_temp_index_file(index_filename) | |
| 135 | |
| 136 if num_blocks: | |
| 137 print("%i MAF blocks extracted for %i regions." % (num_blocks, (num_regions + 1))) | |
| 138 elif num_regions is not None: | |
| 139 print("No MAF blocks could be extracted for %i regions." % (num_regions + 1)) | |
| 140 else: | |
| 141 print("No valid regions have been provided.") | |
| 142 | |
| 143 | |
| 144 if __name__ == "__main__": | |
| 145 __main__() |
