hubarchivecreator: hubArchiveCreator.py comparison

comparison hubArchiveCreator.py @ 1:fb5e60d4d18a draft

planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty

author	rmarenco
date	Wed, 13 Jul 2016 13:36:37 -0400
parents
children	fcff8e9146e7

comparison

equal deleted inserted replaced

-:0f3bc17e5ede
+:fb5e60d4d18a
+#!/usr/bin/python
+# -*- coding: utf8 -*-
+"""
+This Galaxy tool permits to prepare your files to be ready for
+Assembly Hub visualization.
+Program test arguments:
+hubArchiveCreator.py -g test-data/augustusDbia3.gff3 -f test-data/dbia3.fa -d . -u ./tools -o output.html
+"""
+import argparse
+import collections
+import json
+import sys
+# Internal dependencies
+from TrackHub import TrackHub
+from Gff3 import Gff3
+from Bam import Bam
+from BedSimpleRepeats import BedSimpleRepeats
+from Bed import Bed
+from BigWig import BigWig
+from Gtf import Gtf
+# TODO: Verify each subprocessed dependency is accessible [gff3ToGenePred, genePredToBed, twoBitInfo, faToTwoBit, bedToBigBed, sort
+def main(argv):
+# Command Line parsing init
+parser = argparse.ArgumentParser(description='Create a foo.txt inside the given folder.')
+# Reference genome mandatory
+parser.add_argument('-f', '--fasta', help='Fasta file of the reference genome')
+# GFF3 Management
+parser.add_argument('--gff3', action='append', help='GFF3 format')
+# GTF Management
+parser.add_argument('--gtf', action='append', help='GTF format')
+# Bed4+12 (TrfBig)
+parser.add_argument('--bedSimpleRepeats', action='append', help='Bed4+12 format, using simpleRepeats.as')
+# Generic Bed (Blastx transformed to bed)
+parser.add_argument('--bed', action='append', help='Bed generic format')
+# BigWig Management
+parser.add_argument('--bigwig', action='append', help='BigWig format')
+# Bam Management
+parser.add_argument('--bam', action='append', help='Bam format')
+# TODO: Check if the running directory can have issues if we run the tool outside
+parser.add_argument('-d', '--directory',
+help='Running tool directory, where to find the templates. Default is running directory')
+parser.add_argument('-u', '--ucsc_tools_path',
+help='Directory where to find the executables needed to run this tool')
+parser.add_argument('-e', '--extra_files_path',
+help='Name, in galaxy, of the output folder. Where you would want to build the Track Hub Archive')
+parser.add_argument('-o', '--output', help='Name of the HTML summarizing the content of the Track Hub Archive')
+parser.add_argument('-j', '--data_json', help='Json containing the metadata of the inputs')
+ucsc_tools_path = ''
+toolDirectory = '.'
+extra_files_path = '.'
+# Get the args passed in parameter
+args = parser.parse_args()
+input_fasta_file = args.fasta
+# TODO: Add array for each input because we can add multiple -b for example + filter the data associated
+array_inputs_gff3 = args.gff3
+array_inputs_bed_simple_repeats = args.bedSimpleRepeats
+array_inputs_bed_generic = args.bed
+array_inputs_gtf = args.gtf
+array_inputs_bam = args.bam
+array_inputs_bigwig = args.bigwig
+outputFile = args.output
+json_inputs_data = args.data_json
+inputs_data = json.loads(json_inputs_data)
+# We remove the spaces in ["name"] of inputs_data
+sanitize_name_inputs(inputs_data)
+json_inputs_data = args.data_json
+inputs_data = json.loads(json_inputs_data)
+# We remove the spaces in ["name"] of inputs_data
+sanitize_name_inputs(inputs_data)
+if args.directory:
+toolDirectory = args.directory
+if args.extra_files_path:
+extra_files_path = args.extra_files_path
+if args.ucsc_tools_path:
+ucsc_tools_path = args.ucsc_tools_path
+# TODO: Check here all the binaries / tools we need. Exception is missing
+# Create the Track Hub folder
+trackHub = TrackHub(input_fasta_file, outputFile, extra_files_path, toolDirectory)
+all_datatype_dictionary = {}
+# Process Augustus
+if array_inputs_gff3:
+create_ordered_datatype_objects(Gff3, array_inputs_gff3, inputs_data, input_fasta_file,
+extra_files_path, all_datatype_dictionary, toolDirectory)
+# Process Bed simple repeats => From Tandem Repeats Finder / TrfBig
+if array_inputs_bed_simple_repeats:
+create_ordered_datatype_objects(BedSimpleRepeats, array_inputs_bed_simple_repeats, inputs_data, input_fasta_file,
+extra_files_path, all_datatype_dictionary, toolDirectory)
+# Process a Bed => tBlastN or TopHat
+if array_inputs_bed_generic:
+create_ordered_datatype_objects(Bed, array_inputs_bed_generic, inputs_data, input_fasta_file,
+extra_files_path, all_datatype_dictionary, toolDirectory)
+# Process a GTF => Tophat
+if array_inputs_gtf:
+create_ordered_datatype_objects(Gtf, array_inputs_gtf, inputs_data, input_fasta_file,
+extra_files_path, all_datatype_dictionary, toolDirectory)
+# Process a Bam => Tophat
+if array_inputs_bam:
+create_ordered_datatype_objects(Bam, array_inputs_bam, inputs_data, input_fasta_file,
+extra_files_path, all_datatype_dictionary, toolDirectory)
+# Process a BigWig => From Bam
+if array_inputs_bigwig:
+create_ordered_datatype_objects(BigWig, array_inputs_bigwig, inputs_data, input_fasta_file,
+extra_files_path, all_datatype_dictionary, toolDirectory)
+# Create Ordered Dictionary to add the tracks in the tool form order
+all_datatype_ordered_dictionary = collections.OrderedDict(all_datatype_dictionary)
+for index, datatypeObject in all_datatype_ordered_dictionary.iteritems():
+trackHub.addTrack(datatypeObject.track.trackDb)
+# We process all the modifications to create the zip file
+trackHub.createZip()
+# We terminate le process and so create a HTML file summarizing all the files
+trackHub.terminate()
+sys.exit(0)
+def sanitize_name_inputs(inputs_data):
+"""
+Sometimes output from Galaxy, or even just file name from user have spaces
+:param inputs_data: dict[string, dict[string, string]]
+:return:
+"""
+for key in inputs_data:
+inputs_data[key]["name"] = inputs_data[key]["name"].replace(" ", "_")
+def create_ordered_datatype_objects(ExtensionClass, array_inputs, inputs_data, input_fasta_file,
+extra_files_path, all_datatype_dictionary, tool_directory):
+"""
+Function which executes the creation all the necessary files / folders for a special Datatype, for TrackHub
+and update the dictionary of datatype
+:param ExtensionClass: T <= Datatype
+:param array_inputs: list[string]
+:param inputs_data:
+:param input_fasta_file: string
+:param extra_files_path: string
+:param tool_directory; string
+"""
+datatype_dictionary = {}
+# TODO: Optimize this double loop
+for input_false_path in array_inputs:
+for key, data_value in inputs_data.items():
+if key == input_false_path:
+extensionObject = ExtensionClass(input_false_path, data_value,
+input_fasta_file, extra_files_path, tool_directory)
+datatype_dictionary.update({data_value["order_index"]: extensionObject})
+all_datatype_dictionary.update(datatype_dictionary)
+if __name__ == "__main__":
+main(sys.argv)

Mercurial > repos > rmarenco > hubarchivecreator

comparison hubArchiveCreator.py @ 1:fb5e60d4d18a draft