Mercurial > repos > iuc > infercnv
annotate gtf_to_position_file.py @ 1:ddbace8f3277 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 6770266e3437edcb569d9361aadd5ed02caf3662
author | iuc |
---|---|
date | Tue, 27 Aug 2024 13:17:01 +0000 |
parents | be7c0c692879 |
children |
rev | line source |
---|---|
0
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
2 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
3 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
4 """ |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
5 Converts GTF files to proprietary formats. |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
6 """ |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
7 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
8 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
9 # Import statements |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
10 import argparse |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
11 import csv |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
12 import gzip |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
13 import os |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
14 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
15 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
16 __author__ = 'Timothy Tickle, Itay Tirosh, Brian Haas' |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
17 __copyright__ = 'Copyright 2016' |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
18 __credits__ = ["Timothy Tickle"] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
19 __license__ = 'BSD-3' |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
20 __maintainer__ = 'Timothy Tickle' |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
21 __email__ = 'ttickle@bbroadinstitute.org' |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
22 __status__ = 'Development' |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
23 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
24 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
25 def open_file(file_path): |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
26 """ Open a file, handling gzip if necessary. |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
27 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
28 :param file_path: Path to input file |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
29 :type file_path: String |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
30 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
31 :returns: File object |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
32 """ |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
33 if file_path.endswith('.gz'): |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
34 return gzip.open(file_path, 'rt') |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
35 else: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
36 return open(file_path, 'r') |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
37 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
38 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
39 def convert_to_positional_file(input_gtf, output_positional, attribute_key): |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
40 """ Convert input GTF file to positional file. |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
41 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
42 :param input_gtf: Path to input gtf file |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
43 :type input_gtf: String |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
44 :param output_positional: Path to output positional file |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
45 :type output_positional: String |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
46 :param attribute_key: Key of the GTF attribute to use for feature/row names |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
47 :type attribute_key: String |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
48 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
49 :returns: Indicator of success (True) or Failure (False) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
50 :rtype: boolean |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
51 """ |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
52 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
53 if not input_gtf or not os.path.exists(input_gtf): |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
54 print("".join(["gtf_to_position_file.py:: ", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
55 "Could not find input file : " + input_gtf])) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
56 return False |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
57 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
58 all_genes_found = set() |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
59 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
60 # Holds lines to output after parsing. |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
61 output_line = [] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
62 previous_gene = None |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
63 previous_chr = None |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
64 gene_positions = [] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
65 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
66 # Metrics for the file |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
67 i_comments = 0 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
68 i_duplicate_entries = 0 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
69 i_entries = 0 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
70 i_accepted_entries = 0 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
71 i_written_lines = 0 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
72 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
73 with open_file(input_gtf) as gtf: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
74 gtf_file = csv.reader(gtf, delimiter="\t") |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
75 for gtf_line in gtf_file: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
76 if gtf_line[0][0] == "#": |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
77 i_comments += 1 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
78 continue |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
79 i_entries += 1 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
80 # Clean up the attribute keys and match the one of interest. |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
81 attributes = gtf_line[8].split(";") |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
82 attributes = [entry.strip(" ") for entry in attributes] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
83 attributes = [entry.split(" ") for entry in attributes if entry] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
84 attributes = [[entry[0].strip('"'), entry[1].strip('"')] for entry in attributes] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
85 attributes = dict([[entry[0].split("|")[0], entry[1]] for entry in attributes]) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
86 if attribute_key in attributes: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
87 gene_name = attributes[attribute_key] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
88 else: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
89 print("Could not find an attribute in the GTF with the name '" + attribute_key + "'. Line=" + "\t".join(gtf_line)) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
90 exit(99) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
91 if not gene_name == previous_gene: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
92 if len(gene_positions) > 1 and previous_gene not in all_genes_found: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
93 i_accepted_entries += 1 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
94 gene_positions.sort() |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
95 output_line.append("\t".join([previous_gene, |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
96 previous_chr, |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
97 str(gene_positions[0]), |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
98 str(gene_positions[-1])])) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
99 all_genes_found.add(previous_gene) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
100 gene_positions = [] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
101 else: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
102 i_duplicate_entries += 1 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
103 gene_positions += [int(gtf_line[3]), int(gtf_line[4])] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
104 previous_gene = gene_name |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
105 previous_chr = gtf_line[0] |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
106 if previous_gene and previous_chr and len(gene_positions) > 1: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
107 i_accepted_entries += 1 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
108 gene_positions.sort() |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
109 output_line.append("\t".join([previous_gene, |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
110 previous_chr, |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
111 str(gene_positions[0]), |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
112 str(gene_positions[-1])])) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
113 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
114 with open(output_positional, "w") as positional_file: |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
115 i_written_lines += len(output_line) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
116 positional_file.write("\n".join(output_line)) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
117 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
118 # Print metrics |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
119 print("Number of lines read: " + str(i_entries)) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
120 print("Number of comments: " + str(i_comments)) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
121 print("Number of entries: " + str(i_accepted_entries)) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
122 print("Number of duplicate entries: " + str(i_duplicate_entries)) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
123 print("Number of entries written: " + str(i_written_lines)) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
124 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
125 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
126 if __name__ == "__main__": |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
127 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
128 # Parse arguments |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
129 prsr_arguments = argparse.ArgumentParser(prog='gtf_to_position_file.py', |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
130 description='Convert a GTF file to a positional file.', |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
131 formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
132 # Add positional argument |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
133 prsr_arguments.add_argument("input_gtf", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
134 metavar="input_gtf", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
135 help="Path to the input GTF file.") |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
136 prsr_arguments.add_argument("--attribute_name", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
137 metavar="attribute_name", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
138 default="gene_id", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
139 help="The name of the attribute in the GTF attributes to use instead of gene name, for example 'gene_name' or 'transcript_id'.") |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
140 prsr_arguments.add_argument("output_positional", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
141 metavar="output_positional", |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
142 help="Path for the output positional file.") |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
143 args = prsr_arguments.parse_args() |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
144 |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
145 # Run Script |
be7c0c692879
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/infercnv commit 7e7abdef47fdf68f3ca69b75a8477dabc7bfa965
iuc
parents:
diff
changeset
|
146 convert_to_positional_file(args.input_gtf, args.output_positional, args.attribute_name) |