| 4 | 1 #!/usr/bin/env python3 | 
|  | 2 ''' configuration file to set up the paths and constants ''' | 
|  | 3 import os | 
|  | 4 | 
|  | 5 ######## PROFREP ####################################################### | 
|  | 6 ## Constansts | 
|  | 7 N_segment = 50 | 
|  | 8 MAX_FILES_SUBPROFILES = 1000 | 
|  | 9 MAX_PIC_NUM = 50 | 
|  | 10 IMAGE_RES = 300 | 
|  | 11 FASTA_LINE = 60 | 
|  | 12 SEQ_LEN_VIZ = 200000 | 
|  | 13 FORBIDDEN_CHARS = "\\/" | 
|  | 14 HTML_STR = ''' | 
|  | 15 	<!DOCTYPE html> | 
|  | 16 	<html> | 
|  | 17 	<body> | 
|  | 18 		<h2>PROFREP OUTPUT</h2> | 
|  | 19 		<h4> Sequences processed: </h4> | 
|  | 20 		{} | 
|  | 21 		<h4> Total length: </h4> | 
|  | 22 		<pre> {} bp </pre> | 
|  | 23 		<h4> Database: </h4> | 
|  | 24 		<pre> {} </pre> | 
|  | 25 		<hr> | 
|  | 26 		<h3> Repetitive profile(s)</h3> </br> | 
|  | 27 		{} <br/> | 
|  | 28 		<h4>References: </h4> | 
|  | 29 		{} | 
|  | 30 		</h6> | 
|  | 31 	</body> | 
|  | 32 	</html> | 
|  | 33 	''' | 
|  | 34 | 
|  | 35 ## IO | 
|  | 36 DOMAINS_GFF = "output_domains.gff" | 
|  | 37 N_GFF = "N_regions.gff" | 
|  | 38 REPEATS_GFF = "output_repeats.gff" | 
|  | 39 HTML = "output.html" | 
|  | 40 LOG_FILE = "log.txt" | 
|  | 41 PROFREP_DATA = "tool_data/profrep" | 
|  | 42 PROFREP_TBL = "prepared_datasets.txt" | 
|  | 43 PROFREP_OUTPUT_DIR = "profrep_output_dir" | 
|  | 44 ## JBrowse and Tracks Conf | 
|  | 45 jbrowse_data_dir = "data" | 
|  | 46 JSON_CONF_R = """{"hooks" : {"modify": "function( track, f, fdiv ) {fdiv.style.backgroundColor = '#278ECF'}"}}""" | 
|  | 47 JSON_CONF_N = """{"hooks" : {"modify": "function( track, f, fdiv ) {fdiv.style.background = '#474747'}"}}""" | 
|  | 48 COLORS_HEX = ["#7F7F7F", "#00FF00", "#0000FF", "#FF0000", "#01FFFE", "#FFA6FE", | 
|  | 49               "#FFDB66", "#006401", "#010067", "#95003A", "#007DB5", "#FF00F6", | 
|  | 50               "#774D00", "#90FB92", "#0076FF", "#D5FF00", "#FF937E", "#6A826C", | 
|  | 51               "#FF029D", "#FE8900", "#7A4782", "#7E2DD2", "#85A900", "#FF0056", | 
|  | 52               "#A42400", "#00AE7E", "#683D3B", "#BDC6FF", "#263400", "#BDD393", | 
|  | 53               "#00B917", "#9E008E", "#001544", "#C28C9F", "#FF74A3", "#01D0FF", | 
|  | 54               "#004754", "#E56FFE", "#788231", "#0E4CA1", "#91D0CB", "#BE9970", | 
|  | 55               "#968AE8", "#BB8800", "#43002C", "#DEFF74", "#00FFC6", "#FFE502", | 
|  | 56               "#620E00", "#008F9C", "#98FF52", "#7544B1", "#B500FF", "#00FF78", | 
|  | 57               "#FF6E41", "#005F39", "#6B6882", "#5FAD4E", "#A75740", "#A5FFD2", | 
|  | 58               "#FFB167", "#009BFF", "#E85EBE"] | 
|  | 59 COLORS_RGB = ["127,127,127", "0,255,0", "0,0,255", "255,0,0", "1,255,254", | 
|  | 60               "255,166,254", "255,219,102", "0,100,1", "1,0,103", "149,0,58", | 
|  | 61               "0,125,181", "255,0,246", "119,77,0", "144,251,146", "0,118,255", | 
|  | 62               "213,255,0", "255,147,126", "106,130,108", "255,2,157", | 
|  | 63               "254,137,0", "122,71,130", "126,45,210", "133,169,0", "255,0,86", | 
|  | 64               "164,36,0", "0,174,126", "104,61,59", "189,198,255", "38,52,0", | 
|  | 65               "189,211,147", "0,185,23", "158,0,142", "0,21,68", "194,140,159", | 
|  | 66               "255,116,163", "1,208,255", "0,71,84", "229,111,254", | 
|  | 67               "120,130,49", "14,76,161", "145,208,203", "190,153,112", | 
|  | 68               "150,138,232", "187,136,0", "67,0,44", "222,255,116", | 
|  | 69               "0,255,198", "255,229,2", "98,14,0", "0,143,156", "152,255,82", | 
|  | 70               "117,68,177", "181,0,255", "0,255,120", "255,110,65", "0,95,57", | 
|  | 71               "107,104,130", "95,173,78", "167,87,64", "165,255,210", | 
|  | 72               "255,177,103", "0,155,255", "232,94,190"] | 
|  | 73 TRACK_LIST = ''' | 
|  | 74 	\t,{}\n | 
|  | 75 	\t"storeClass" : "JBrowse/Store/SeqFeature/BigWig", | 
|  | 76 	\t"urlTemplate" : "{}", | 
|  | 77 	\t"type" : "JBrowse/View/Track/Wiggle/XYPlot", | 
|  | 78 	\t"label" : "{}", | 
|  | 79 	\t"key" : "{}", | 
|  | 80 	\t"style": {} | 
|  | 81 	\t\t"pos_color": "{}" | 
|  | 82 	\t {}, | 
|  | 83 	\t"scale" : "log" | 
|  | 84 	\t{}\n | 
|  | 85 	''' | 
|  | 86 | 
|  | 87 ## GFF tracks | 
|  | 88 HEADER_GFF = "##gff-version 3" | 
|  | 89 SOURCE_PROFREP = "profrep" | 
|  | 90 SOURCE_DANTE = "dante" | 
|  | 91 PHASE = "." | 
|  | 92 DOMAINS_FEATURE = "protein_domain" | 
|  | 93 REPEATS_FEATURE = "repeat" | 
|  | 94 N_NAME = "N" | 
|  | 95 N_FEATURE = "N_region" | 
|  | 96 HEADER_WIG = "variableStep\tchrom=" | 
|  | 97 GFF_EMPTY = "." | 
|  | 98 | 
|  | 99 ######### BIG WIG ###################################################### | 
|  | 100 CHROM_SIZES_FILE = "chrom_sizes.txt" | 
|  | 101 | 
|  | 102 ######### EXTRACT_DATA_DOR_PROFREP ##################################### | 
|  | 103 HITSORT_CLS = "seqclust/clustering/hitsort.cls" | 
|  | 104 READS_ALL = "seqclust/sequences/sequences.fasta" | 
|  | 105 ANNOTATION = "PROFREP_CLASSIFICATION_TEMPLATE.csv" | 
|  | 106 | 
|  | 107 ######### PROFREP_DB_REDUCING ########################################## | 
|  | 108 MEM_LIM = 1500  # MB | 
|  | 109 CLS_REDUCED = "hitsort_reduced.cls" | 
|  | 110 READS_ALL_REDUCED = "reads_all_reduced" | 
|  | 111 | 
|  | 112 ######### PROFREP_REFINING ############################################# | 
|  | 113 WITH_DOMAINS = "mobile_element" | 
|  | 114 QUALITY_DIFF_TO_REMOVE = 0.05  # 5% tolerance of PID | 
|  | 115 | 
|  | 116 ######### DANTE ############################################## | 
|  | 117 MAIN_GIT_DIR = os.path.dirname(os.path.realpath(__file__)) | 
|  | 118 DOMAINS_DATA = os.path.join(MAIN_GIT_DIR, "domains_data") | 
|  | 119 TMP = "tmp" | 
|  | 120 SC_MATRIX = os.path.join(DOMAINS_DATA, "blosum80.txt") | 
|  | 121 AMBIGUOUS_TAG = "Ambiguous_domain" | 
|  | 122 ## IO | 
|  | 123 CLASS_FILE = "ALL.classification-new" | 
|  | 124 LAST_DB_FILE = "ALL_protein-domains_05.fasta" | 
|  | 125 DOM_PROT_SEQ = "dom_prot_seq.fa" | 
|  | 126 FILT_DOM_GFF = "domains_filtered.gff" | 
|  | 127 EXTRACT_DOM_STAT = "domains_counts.txt" | 
|  | 128 EXTRACT_OUT_DIR = "extracted_domains" |