annotate data_manager/add_ctat_lncrna_annotations.py @ 0:a3aa3f9e1702 draft default tip

Uploaded
author trinity_ctat
date Mon, 16 Jul 2018 20:42:55 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
1 #!/usr/bin/env python
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
2 # ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
3
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
4 # Rewritten by H.E. Cicada Brokaw Dennis from a source downloaded from the toolshed and
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
5 # other example code on the web.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
6 # This file downloads annotations for lncrna (slncky tool)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
7
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
8 import argparse
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
9 import os
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
10 import subprocess
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
11
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
12 # The following is used to generate a unique_id value
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
13 from datetime import *
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
14
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
15 # Remove the following line when testing without galaxy package:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
16 from galaxy.util.json import to_json_string
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
17 # Am not using the following:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
18 # from galaxy.util.json import from_json_string
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
19
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
20 # The FileListParser is used by get_ctat_genome_filenames(),
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
21 # which is called by the Data Manager interface (.xml file) to get
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
22 # the filenames that are available online at broadinstitute.org
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
23 # Not sure best way to do it.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
24 # This object uses HTMLParser to look through the html
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
25 # searching for the filenames within anchor tags.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
26 import urllib2
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
27 from HTMLParser import HTMLParser
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
28
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
29 #_CTAT_lncrnaIndexPage_URL = 'https://data.broadinstitute.org/Trinity/CTAT/lncrna/annotations.tar.gz'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
30 _CTAT_lncrnaDownload_URL = 'https://data.broadinstitute.org/Trinity/CTAT/lncrna/annotations.tar.gz'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
31 _CTAT_lncrnaTableName = 'ctat_lncrna_annotations'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
32 _CTAT_lncrnaDir_Name = 'annotations'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
33 _CTAT_lncrna_DisplayNamePrefix = 'CTAT_lncrna_annotations_'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
34 _lncrnaFileExtension = 'lc'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
35 _NumBytesNeededForAnnotations = 2147483648 # Number of bytes
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
36 #_DownloadFileSize = 5790678746 # 5.4 Gigabytes.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
37 _Download_TestFile = 'write_testfile.txt'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
38 _DownloadSuccessFile = 'download_succeeded.txt'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
39
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
40 '''
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
41 class FileListParser(HTMLParser):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
42 def __init__(self):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
43 # Have to use direct call to super class rather than using super():
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
44 # super(FileListParser, self).__init__()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
45 # because HTMLParser is an "old style" class and its inheritance chain does not include object.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
46 HTMLParser.__init__(self)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
47 self.filenames = set()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
48 def handle_starttag(self, tag, attrs):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
49 # Look for filename references in anchor tags and add them to filenames.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
50 if tag == "a":
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
51 # The tag is an anchor tag.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
52 for attribute in attrs:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
53 # print "Checking: {:s}".format(str(attribute))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
54 if attribute[0] == "href":
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
55 # Does the href have a tar.gz in it?
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
56 if ("tar.gz" in attribute[1]) and ("md5" not in attribute[1]):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
57 # Add the value to filenames.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
58 self.filenames.add(attribute[1])
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
59 # End of class FileListParser
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
60 '''
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
61
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
62
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
63 def get_ctat_lncrna_annotations_locations():
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
64 # For dynamic options need to return an interable with contents that are tuples with 3 items.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
65 # Item one is a string that is the display name put into the option list.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
66 # Item two is the value that is put into the parameter associated with the option list.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
67 # Item three is a True or False value, indicating whether the item is selected.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
68 options = []
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
69 # open the url and retrieve the filenames of the files in the directory.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
70 # resource = urllib2.urlopen(_CTAT_lncrnaIndexPage_URL)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
71 # theHTML = resource.read()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
72 # filelist_parser = FileListParser()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
73 # filelist_parser.feed(theHTML)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
74 options.append((_CTAT_lncrnaDir_Name, _CTAT_lncrnaDownload_URL, True))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
75 print "The list of items being returned for the option menu is:"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
76 print str(options)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
77 return options
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
78
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
79 def download_annotations(src_location, destination, force_download):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
80 # We do not know if the annotations has been downloaded already.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
81 # This function returns whether or not the annotations actually gets downloaded.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
82 annotations_was_downloaded = False
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
83 # Get the root filename of the Genome Directory.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
84 # The part after the last '/' and before the first '.'
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
85 root_annotations_dirname = src_location.split("/")[-1].split(".")[0]
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
86
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
87 # We want to make sure that destination is absolute fully specified path.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
88 cannonical_destination = os.path.realpath(destination)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
89 if cannonical_destination.split("/")[-1] != root_annotations_dirname:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
90 cannonical_destination += "/" + root_annotations_dirname
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
91 if os.path.exists(cannonical_destination):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
92 if not os.path.isdir(cannonical_destination):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
93 raise ValueError("The destination is not a directory: " + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
94 "{:s}".format(cannonical_destination))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
95 # else all is good. It is a directory.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
96 else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
97 # We need to create it.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
98 try:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
99 os.makedirs(cannonical_destination)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
100 except os.error:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
101 print "ERROR: Trying to create the following directory path:"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
102 print "\t{:s}".format(cannonical_destination)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
103 raise
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
104
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
105 # Make sure the directory now exists and we can write to it.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
106 if not os.path.exists(cannonical_destination):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
107 # It should have been created, but if it doesn't exist at this point
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
108 # in the code, something is wrong. Raise an error.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
109 raise OSError("The destination directory could not be created: " + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
110 "{:s}".format(cannonical_destination))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
111 test_writing_file = "{:s}/{:s}".format(cannonical_destination, _Download_TestFile)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
112 try:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
113 filehandle = open(test_writing_file, "w")
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
114 filehandle.write("Testing writing to this file.")
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
115 filehandle.close()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
116 os.remove(test_writing_file)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
117 except IOError:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
118 print "The destination directory could not be written into: " + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
119 "{:s}".format(cannonical_destination)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
120 raise
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
121
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
122 # Get the list of files in the directory,
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
123 # We use it to check for a previous download or extraction among other things.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
124 orig_files_in_destdir = set(os.listdir(cannonical_destination))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
125 # See whether the file has been downloaded already.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
126 download_success_file_path = "{:s}/{:s}".format(cannonical_destination, _DownloadSuccessFile)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
127 if (_DownloadSuccessFile not in orig_files_in_destdir) or force_download:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
128 # Check whether there is enough space on the device for the annotations.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
129 statvfs = os.statvfs(cannonical_destination)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
130 num_avail_bytes = statvfs.f_frsize * statvfs.f_bavail # Number of free bytes that ordinary users
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
131 # are allowed to use (excl. reserved space)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
132 if (num_avail_bytes < _NumBytesNeededForAnnotations):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
133 raise OSError("There is insufficient space ({:s} bytes)".format(str(num_avail_bytes)) + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
134 " on the device of the destination directory: " + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
135 "{:s}".format(cannonical_destination))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
136
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
137
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
138 if (_DownloadSuccessFile in orig_files_in_destdir):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
139 # Since we are redoing the download,
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
140 # the success file needs to be removed
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
141 # until the download has succeeded.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
142 os.remove(download_success_file_path)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
143 # We want to transfer and untar the file without storing the tar file, because that
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
144 # adds all that much more space to the needed amount of free space on the disk.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
145 # Use subprocess to pipe the output of curl into tar.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
146 # Make curl silent so progress is not printed to stderr.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
147 command = "curl --silent {:s} | tar -xzf - -C {:s} --strip 1".format(src_location, cannonical_destination)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
148 try: # to send the command that downloads and extracts the file.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
149 command_output = subprocess.check_output(command, shell=True)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
150 # FIX - not sure check_output is what we want to use. If we want to have an error raised on
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
151 # any problem, maybe we should not be checking output.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
152 except subprocess.CalledProcessError:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
153 print "ERROR: Trying to run the following command:\n\t{:s}".format(command)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
154 raise
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
155 else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
156 annotations_was_downloaded = True
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
157
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
158 # Some code to help us if errors occur.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
159 print "\n*******************************\nFinished download and extraction."
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
160 if os.path.exists(cannonical_destination) and os.path.isdir(cannonical_destination):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
161 subprocess.check_call("ls -la {:s} 2>&1".format(cannonical_destination), shell=True)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
162
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
163 files_in_destdir = set(os.listdir(cannonical_destination))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
164 found_filenames = set()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
165 for filename in files_in_destdir:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
166 # There should be three files, but some OS's might have created
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
167 # other files in the directory, or maybe the user did.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
168 # Look for the annotations files.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
169 # The download files' names should start with the root_annotations_dirname
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
170 # print "Is root: {:s} in file: {:s}".format(root_annotations_dirname, filename)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
171 if root_annotations_dirname in filename:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
172 found_filenames.add(filename)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
173 # print "The found_filenames are:\n\t{:s}".format(str(found_filenames))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
174 ## Changed from found_filenames
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
175 if (len(files_in_destdir) >= 4):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
176 # FIX - we could md5 the files to make sure they are correct.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
177 # Or at least check their sizes, to see if the download completed ok.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
178 # Also we could check the names of the files.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
179 try:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
180 # Create a file to indicate that the download succeeded.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
181 subprocess.check_call("touch {:s}".format(download_success_file_path), shell=True)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
182 except IOError:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
183 print "The download_success file could not be created: " + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
184 "{:s}".format(download_success_file_path)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
185 raise
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
186 else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
187 print "After download, the potential annotations files found are:\n\t{:s}".format(str(found_filenames))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
188 raise ValueError("ERROR: Could not find the extracted annotations files " + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
189 "in the destination directory:\n\t{:s}".format(cannonical_destination))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
190
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
191 return (cannonical_destination, root_annotations_dirname, annotations_was_downloaded)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
192
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
193 def main():
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
194 #Parse Command Line
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
195 # print "At start before parsing arguments."
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
196 parser = argparse.ArgumentParser()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
197 parser.add_argument('-d', '--download_location', default="", \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
198 help='This is the download location of the lncrna annotations.')
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
199 parser.add_argument('-n', '--display_name', default="", \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
200 help='Is used as the selector text for the entry of this lncrna annotations in the data table.')
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
201 parser.add_argument('-p', '--destination_path', \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
202 help='Full path of the lncrna annotations location or destination, either where it is, or where it will be placed.')
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
203 parser.add_argument('-o', '--output_filename', \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
204 help='Name of the output file, where the json dictionary will be written.')
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
205 parser.add_argument('-f', '--force_download',
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
206 help='Forces download of lncrna annotations, even if previously downloaded. ' + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
207 'Requires download_location to be set in order to work.', action="store_true")
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
208 args = parser.parse_args()
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
209
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
210 # print "Arguments are parsed."
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
211 print "\ndownload_location is {:s}".format(str(_CTAT_lncrnaDownload_URL))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
212 print "display_name is {:s}".format(str(args.display_name))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
213 print "destination_path is {:s}\n".format(str(args.destination_path))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
214 root_annotations_dirname = None
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
215 # FIX - Prob don't need annotations_was_downloaded. Not doing anything with it.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
216 # But it indicates success downloading the annotations, so maybe should be checking it.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
217 annotations_was_downloaded = False
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
218 if (_CTAT_lncrnaDownload_URL != ""):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
219 annotations_directory, root_annotations_dirname, annotations_was_downloaded = \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
220 download_annotations(src_location=_CTAT_lncrnaDownload_URL, \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
221 destination=args.destination_path, \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
222 force_download=args.force_download)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
223 else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
224 cannonical_destination = os.path.realpath(args.destination_path)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
225 # If args.destination_path is a directory containing
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
226 # a subdirectory that contains the annotations files,
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
227 # then we need to set the annotations_directory to be that subdirectory.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
228 if not os.path.exists(cannonical_destination):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
229 raise ValueError("Cannot find the Lncrna annotations.\n" + \
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
230 "The directory does not exist:\n\t{:s}".format(cannonical_destination))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
231 files_in_destination_path = os.listdir(cannonical_destination)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
232 if (len(files_in_destination_path) == 4):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
233 #path_to_file = "{:s}/{:s}".format(cannonical_destination, files_in_destination_path[0])
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
234 #if os.path.isdir(path_to_file):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
235 # annotations_directory = path_to_file
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
236 #else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
237 annotations_directory = cannonical_destination
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
238 else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
239 raise ValueError("Contents of destination directory not equal to expected - 4")
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
240 #annotations_directory = cannonical_destination
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
241 # Get the root_annotations_dirname of the annotations from the annotations_directory name.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
242 root_annotations_dirname = annotations_directory.split("/")[-1].split(".")[0]
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
243
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
244 # Check if there is an actual Lncrna annotations file in the annotations_directory.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
245 print "\nThe location of the Lncrna annotations is {:s}.\n".format(annotations_directory)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
246 files_in_annotations_directory = set(os.listdir(annotations_directory))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
247 annotations_file_found = False
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
248 annotations_file_path_mm9 = annotations_directory+"/annotations.config"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
249 annotations_file_path_mm10 = annotations_directory+"/annotations.config"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
250 annotations_file_path_hg19 = annotations_directory+"/annotations.config"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
251 annotations_file_path_hg38 = annotations_directory+"/annotations.config"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
252
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
253 # Set the display_name
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
254 # if (args.display_name is None) or (args.display_name == ""):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
255 # Use the root_annotations_dirname.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
256 # print "display_name_ok$$$$$$$"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
257
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
258 if (root_annotations_dirname != None) and (root_annotations_dirname != ""):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
259 print "root_annotations_ok%%%%"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
260 display_name_hg19 = "hg19"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
261 display_name_hg38 = "hg38"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
262 display_name_mm10 = "mm10"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
263 display_name_mm9 = "mm9"
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
264 else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
265 display_name = _CTAT_lncrna_DisplayNamePrefix + _CTAT_lncrnaDir_Name
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
266 print "WARNING: Did not set the display name. Using the default: {:s}".format(display_name_value)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
267 #else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
268 # display_name = _CTAT_lncrna_DisplayNamePrefix + args.display_name
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
269 # display_name = display_name.replace(" ","_")
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
270
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
271 # Set the unique_id
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
272 datetime_stamp = datetime.now().strftime("_%Y_%m_%d_%H_%M_%S_%f")
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
273 if (root_annotations_dirname != None) and (root_annotations_dirname != ""):
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
274 hg19_unique_id = "ctat_lncrna_hg19" + datetime_stamp
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
275 mm10_unique_id = "ctat_lncrna_mm10" + datetime_stamp
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
276 mm9_unique_id = "ctat_lncrna_mm9" + datetime_stamp
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
277 hg38_unique_id = "ctat_lncrna_hg38" + datetime_stamp
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
278 else:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
279 unique_id = _CTAT_lncrnaDir_Name + datetime_stamp
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
280
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
281 print "The hg19 Index's display_name will be set to: {:s}\n".format(display_name_hg19)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
282 print "Its hg19 unique_id will be set to: {:s}\n".format(hg19_unique_id)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
283 print "Its hg19 dir_path will be set to: {:s}\n".format(annotations_file_path_hg19)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
284
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
285
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
286 print "The hg38 Index's display_name will be set to: {:s}\n".format(display_name_hg38)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
287 print "Its hg38 unique_id will be set to: {:s}\n".format(hg38_unique_id)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
288 print "Its hg38 dir_path will be set to: {:s}\n".format(annotations_file_path_hg38)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
289
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
290
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
291 print "The mm9 Index's display_name will be set to: {:s}\n".format(display_name_mm9)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
292 print "Its mm9 unique_id will be set to: {:s}\n".format(mm9_unique_id)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
293 print "Its mm9 dir_path will be set to: {:s}\n".format(annotations_file_path_mm9)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
294
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
295
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
296 print "The mm10 Index's display_name will be set to: {:s}\n".format(display_name_mm10)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
297 print "Its mm10 unique_id will be set to: {:s}\n".format(mm10_unique_id)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
298 print "Its mm10 dir_path will be set to: {:s}\n".format(annotations_file_path_mm10)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
299
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
300 data_manager_dict = {}
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
301 data_manager_dict['data_tables'] = {}
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
302 data_manager_dict['data_tables'][_CTAT_lncrnaTableName] = []
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
303 data_table_entry_mm9 = dict(value=display_name_mm9, name=display_name_mm9, path=annotations_file_path_mm9)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
304 data_manager_dict['data_tables'][_CTAT_lncrnaTableName].append(data_table_entry_mm9)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
305
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
306 data_table_entry_mm10 = dict(value=display_name_mm10, name=display_name_mm10, path=annotations_file_path_mm10)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
307 data_manager_dict['data_tables'][_CTAT_lncrnaTableName].append(data_table_entry_mm10)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
308
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
309 data_table_entry_hg19 = dict(value=display_name_hg19, name=display_name_hg19, path=annotations_file_path_hg19)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
310 data_manager_dict['data_tables'][_CTAT_lncrnaTableName].append(data_table_entry_hg19)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
311
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
312 data_table_entry_hg38 = dict(value=display_name_hg38, name=display_name_hg38, path=annotations_file_path_hg38)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
313 data_manager_dict['data_tables'][_CTAT_lncrnaTableName].append(data_table_entry_hg38)
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
314
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
315 # Temporarily the output file's dictionary is written for debugging:
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
316 print "The dictionary for the output file is:\n\t{:s}".format(str(data_manager_dict))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
317 # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager,
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
318 # which then puts it into the correct .loc file (I think).
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
319 # Remove the following line when testing without galaxy package.
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
320 open(args.output_filename, 'wb').write(to_json_string(data_manager_dict))
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
321
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
322 if __name__ == "__main__":
a3aa3f9e1702 Uploaded
trinity_ctat
parents:
diff changeset
323 main()