Mercurial > repos > rmarenco > hubarchivecreator
annotate Gtf.py @ 9:4f9847539a28 draft
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 1b241bbccfc1b90ce2626dc4f49788bb9e693a80
author | rmarenco |
---|---|
date | Wed, 20 Jul 2016 12:29:08 -0400 |
parents | fb5e60d4d18a |
children | acc233161f50 |
rev | line source |
---|---|
1
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
1 #!/usr/bin/python |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
2 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
3 import os |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
4 import tempfile |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
5 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
6 # Internal dependencies |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
7 from Datatype import Datatype |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
8 from Track import Track |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
9 from TrackDb import TrackDb |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
10 from util import subtools |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
11 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
12 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
13 class Gtf( Datatype ): |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
14 def __init__( self, input_gtf_false_path, data_gtf, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
15 input_fasta_file, extra_files_path, tool_directory ): |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
16 super(Gtf, self).__init__( input_fasta_file=input_fasta_file, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
17 extra_files_path=extra_files_path, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
18 tool_directory=tool_directory ) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
19 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
20 self.track = None |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
21 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
22 self.input_gtf_false_path = input_gtf_false_path |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
23 self.name_gtf = data_gtf["name"] |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
24 self.priority = data_gtf["order_index"] |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
25 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
26 print "Creating TrackHub GTF from (falsePath: %s; name: %s)" % ( self.input_gtf_false_path, self.name_gtf) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
27 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
28 # TODO: See if we need these temporary files as part of the generated files |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
29 genePredFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".genePred") |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
30 unsortedBedFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".unsortedBed") |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
31 sortedBedFile = tempfile.NamedTemporaryFile(suffix=".sortedBed") |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
32 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
33 twoBitInfoFile = tempfile.NamedTemporaryFile(bufsize=0) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
34 chromSizesFile = tempfile.NamedTemporaryFile(bufsize=0, suffix=".chrom.sizes") |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
35 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
36 # GtfToGenePred |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
37 subtools.gtfToGenePred(self.input_gtf_false_path, genePredFile.name) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
38 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
39 # TODO: From there, refactor because common use with Gff3.py |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
40 # genePredToBed processing |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
41 subtools.genePredToBed(genePredFile.name, unsortedBedFile.name) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
42 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
43 # Sort processing |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
44 subtools.sort(unsortedBedFile.name, sortedBedFile.name) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
45 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
46 # TODO: Chehck if the twoBitInfo / ChromSizes is redundant and make an intermediate class |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
47 # Generate the twoBitInfo |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
48 subtools.twoBitInfo(self.twoBitFile.name, twoBitInfoFile.name) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
49 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
50 # Then we get the output to generate the chromSizes |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
51 # TODO: Check if no errors |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
52 subtools.sortChromSizes(twoBitInfoFile.name, chromSizesFile.name) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
53 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
54 # bedToBigBed processing |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
55 # TODO: Change the name of the bb, to tool + genome + possible adding if multiple + .bb |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
56 trackName = "".join( ( self.name_gtf, ".bb") ) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
57 myBigBedFilePath = os.path.join(self.myTrackFolderPath, trackName) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
58 with open(myBigBedFilePath, 'w') as bigBedFile: |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
59 subtools.bedToBigBed(sortedBedFile.name, chromSizesFile.name, bigBedFile.name) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
60 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
61 # Create the Track Object |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
62 dataURL = "tracks/%s" % trackName |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
63 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
64 trackDb = TrackDb( |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
65 trackName=trackName, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
66 longLabel=self.name_gtf, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
67 shortLabel=self.getShortName( self.name_gtf ), |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
68 trackDataURL=dataURL, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
69 trackType='bigBed 12 +', |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
70 visibility='dense', |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
71 priority=self.priority, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
72 ) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
73 self.track = Track( |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
74 trackFile=myBigBedFilePath, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
75 trackDb=trackDb, |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
76 ) |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
77 |
fb5e60d4d18a
planemo upload for repository https://github.com/goeckslab/hub-archive-creator commit 64cfc08088d11f6818c1b4e5514ef9e67969eaff-dirty
rmarenco
parents:
diff
changeset
|
78 print("- %s created in %s" % (trackName, myBigBedFilePath)) |