Mercurial > repos > rmarenco > hubarchivecreator
comparison hubArchiveCreator.py @ 1:fb5e60d4d18a draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
| author | rmarenco |
|---|---|
| date | Wed, 13 Jul 2016 13:36:37 -0400 |
| parents | |
| children | fcff8e9146e7 |
comparison
equal
deleted
inserted
replaced
| 0:0f3bc17e5ede | 1:fb5e60d4d18a |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # -*- coding: utf8 -*- | |
| 3 | |
| 4 """ | |
| 5 This Galaxy tool permits to prepare your files to be ready for | |
| 6 Assembly Hub visualization. | |
| 7 Program test arguments: | |
| 8 hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html | |
| 9 """ | |
| 10 | |
| 11 import argparse | |
| 12 import collections | |
| 13 import json | |
| 14 import sys | |
| 15 | |
| 16 # Internal dependencies | |
| 17 from TrackHub import TrackHub | |
| 18 from Gff3 import Gff3 | |
| 19 from Bam import Bam | |
| 20 from BedSimpleRepeats import BedSimpleRepeats | |
| 21 from Bed import Bed | |
| 22 from BigWig import BigWig | |
| 23 from Gtf import Gtf | |
| 24 | |
| 25 | |
| 26 # TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort | |
| 27 | |
| 28 | |
| 29 def main(argv): | |
| 30 # Command Line parsing init | |
| 31 parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.') | |
| 32 | |
| 33 # Reference genome mandatory | |
| 34 parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome') | |
| 35 | |
| 36 # GFF3 Management | |
| 37 parser.add_argument('--gff3', action='append', help='GFF3 format') | |
| 38 | |
| 39 # GTF Management | |
| 40 parser.add_argument('--gtf', action='append', help='GTF format') | |
| 41 | |
| 42 # Bed4+12 (TrfBig) | |
| 43 parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as') | |
| 44 | |
| 45 # Generic Bed (Blastx transformed to bed) | |
| 46 parser.add_argument('--bed', action='append', help='Bed generic format') | |
| 47 | |
| 48 # BigWig Management | |
| 49 parser.add_argument('--bigwig', action='append', help='BigWig format') | |
| 50 | |
| 51 # Bam Management | |
| 52 parser.add_argument('--bam', action='append', help='Bam format') | |
| 53 | |
| 54 # TODO: Check if the running directory can have issues if we run the tool outside | |
| 55 parser.add_argument('-d', '--directory', | |
| 56 help='Running tool directory, where to find the templates. Default is running directory') | |
| 57 parser.add_argument('-u', '--ucsc_tools_path', | |
| 58 help='Directory where to find the executables needed to run this tool') | |
| 59 parser.add_argument('-e', '--extra_files_path', | |
| 60 help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive') | |
| 61 parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive') | |
| 62 | |
| 63 parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs') | |
| 64 | |
| 65 ucsc_tools_path = '' | |
| 66 | |
| 67 toolDirectory = '.' | |
| 68 extra_files_path = '.' | |
| 69 | |
| 70 # Get the args passed in parameter | |
| 71 args = parser.parse_args() | |
| 72 | |
| 73 input_fasta_file = args.fasta | |
| 74 | |
| 75 # TODO: Add array for each input because we can add multiple -b for example + filter the data associated | |
| 76 | |
| 77 | |
| 78 array_inputs_gff3 = args.gff3 | |
| 79 array_inputs_bed_simple_repeats = args.bedSimpleRepeats | |
| 80 array_inputs_bed_generic = args.bed | |
| 81 array_inputs_gtf = args.gtf | |
| 82 array_inputs_bam = args.bam | |
| 83 array_inputs_bigwig = args.bigwig | |
| 84 | |
| 85 outputFile = args.output | |
| 86 json_inputs_data = args.data_json | |
| 87 | |
| 88 inputs_data = json.loads(json_inputs_data) | |
| 89 | |
| 90 # We remove the spaces in ["name"] of inputs_data | |
| 91 sanitize_name_inputs(inputs_data) | |
| 92 | |
| 93 json_inputs_data = args.data_json | |
| 94 | |
| 95 inputs_data = json.loads(json_inputs_data) | |
| 96 # We remove the spaces in ["name"] of inputs_data | |
| 97 sanitize_name_inputs(inputs_data) | |
| 98 | |
| 99 if args.directory: | |
| 100 toolDirectory = args.directory | |
| 101 if args.extra_files_path: | |
| 102 extra_files_path = args.extra_files_path | |
| 103 if args.ucsc_tools_path: | |
| 104 ucsc_tools_path = args.ucsc_tools_path | |
| 105 | |
| 106 # TODO: Check here all the binaries / tools we need. Exception is missing | |
| 107 | |
| 108 # Create the Track Hub folder | |
| 109 trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory) | |
| 110 | |
| 111 all_datatype_dictionary = {} | |
| 112 | |
| 113 # Process Augustus | |
| 114 if array_inputs_gff3: | |
| 115 create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file, | |
| 116 extra_files_path, all_datatype_dictionary, toolDirectory) | |
| 117 | |
| 118 # Process Bed simple repeats => From Tandem Repeats Finder / TrfBig | |
| 119 if array_inputs_bed_simple_repeats: | |
| 120 create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file, | |
| 121 extra_files_path, all_datatype_dictionary, toolDirectory) | |
| 122 | |
| 123 # Process a Bed => tBlastN or TopHat | |
| 124 if array_inputs_bed_generic: | |
| 125 create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file, | |
| 126 extra_files_path, all_datatype_dictionary, toolDirectory) | |
| 127 | |
| 128 # Process a GTF => Tophat | |
| 129 if array_inputs_gtf: | |
| 130 create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file, | |
| 131 extra_files_path, all_datatype_dictionary, toolDirectory) | |
| 132 | |
| 133 # Process a Bam => Tophat | |
| 134 if array_inputs_bam: | |
| 135 create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file, | |
| 136 extra_files_path, all_datatype_dictionary, toolDirectory) | |
| 137 | |
| 138 # Process a BigWig => From Bam | |
| 139 if array_inputs_bigwig: | |
| 140 create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file, | |
| 141 extra_files_path, all_datatype_dictionary, toolDirectory) | |
| 142 | |
| 143 # Create Ordered Dictionary to add the tracks in the tool form order | |
| 144 all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary) | |
| 145 | |
| 146 for index, datatypeObject in all_datatype_ordered_dictionary.iteritems(): | |
| 147 trackHub.addTrack(datatypeObject.track.trackDb) | |
| 148 | |
| 149 # We process all the modifications to create the zip file | |
| 150 trackHub.createZip() | |
| 151 | |
| 152 # We terminate le process and so create a HTML file summarizing all the files | |
| 153 trackHub.terminate() | |
| 154 | |
| 155 sys.exit(0) | |
| 156 | |
| 157 | |
| 158 def sanitize_name_inputs(inputs_data): | |
| 159 """ | |
| 160 Sometimes output from Galaxy, or even just file name from user have spaces | |
| 161 :param inputs_data: dict[string, dict[string, string]] | |
| 162 :return: | |
| 163 """ | |
| 164 for key in inputs_data: | |
| 165 inputs_data[key]["name"] = inputs_data[key]["name"].replace(" ", "_") | |
| 166 | |
| 167 | |
| 168 def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file, | |
| 169 extra_files_path, all_datatype_dictionary, tool_directory): | |
| 170 """ | |
| 171 Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub | |
| 172 and update the dictionary of datatype | |
| 173 :param ExtensionClass: T <= Datatype | |
| 174 :param array_inputs: list[string] | |
| 175 :param inputs_data: | |
| 176 :param input_fasta_file: string | |
| 177 :param extra_files_path: string | |
| 178 :param tool_directory; string | |
| 179 """ | |
| 180 | |
| 181 datatype_dictionary = {} | |
| 182 | |
| 183 # TODO: Optimize this double loop | |
| 184 for input_false_path in array_inputs: | |
| 185 for key, data_value in inputs_data.items(): | |
| 186 if key == input_false_path: | |
| 187 extensionObject = ExtensionClass(input_false_path, data_value, | |
| 188 input_fasta_file, extra_files_path, tool_directory) | |
| 189 datatype_dictionary.update({data_value["order_index"]: extensionObject}) | |
| 190 all_datatype_dictionary.update(datatype_dictionary) | |
| 191 | |
| 192 if __name__ == "__main__": | |
| 193 main(sys.argv) |
