itsx: ITSx annotate

annotate ITSx @ 9:3610c6312b85 draft default tip

Uploaded

author	okorol
date	Fri, 05 Jun 2015 12:05:37 -0400
parents	f82c70f54bd7
children

rev	line source
0 f82c70f54bd7 Uploaded okorol parents: diff changeset	1 #!/usr/bin/perl
f82c70f54bd7 Uploaded okorol parents: diff changeset	2 # ITSx ITS Extractor
f82c70f54bd7 Uploaded okorol parents: diff changeset	3 $app_title = "ITSx -- Identifies ITS sequences and extracts the ITS region";
f82c70f54bd7 Uploaded okorol parents: diff changeset	4 $app_author = "Johan Bengtsson-Palme et al., University of Gothenburg";
f82c70f54bd7 Uploaded okorol parents: diff changeset	5 $app_version = "1.0.11";
f82c70f54bd7 Uploaded okorol parents: diff changeset	6 $app_message = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	7 # ----------------------------------------------------------------- #
f82c70f54bd7 Uploaded okorol parents: diff changeset	8
f82c70f54bd7 Uploaded okorol parents: diff changeset	9 # License information
f82c70f54bd7 Uploaded okorol parents: diff changeset	10 $license =
f82c70f54bd7 Uploaded okorol parents: diff changeset	11 " ITSx - ITS Extractor -- Identifies ITS sequences and extracts the ITS region\
f82c70f54bd7 Uploaded okorol parents: diff changeset	12 Copyright (C) 2012-2014 Johan Bengtsson-Palme et al.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	13 \
f82c70f54bd7 Uploaded okorol parents: diff changeset	14 This program is free software: you can redistribute it and/or modify\
f82c70f54bd7 Uploaded okorol parents: diff changeset	15 it under the terms of the GNU General Public License as published by\
f82c70f54bd7 Uploaded okorol parents: diff changeset	16 the Free Software Foundation, either version 3 of the License, or\
f82c70f54bd7 Uploaded okorol parents: diff changeset	17 (at your option) any later version.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	18 \
f82c70f54bd7 Uploaded okorol parents: diff changeset	19 This program is distributed in the hope that it will be useful,\
f82c70f54bd7 Uploaded okorol parents: diff changeset	20 but WITHOUT ANY WARRANTY; without even the implied warranty of\
f82c70f54bd7 Uploaded okorol parents: diff changeset	21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\
f82c70f54bd7 Uploaded okorol parents: diff changeset	22 GNU General Public License for more details.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	23 \
f82c70f54bd7 Uploaded okorol parents: diff changeset	24 You should have received a copy of the GNU General Public License\
f82c70f54bd7 Uploaded okorol parents: diff changeset	25 along with this program, in a file called 'license.txt'\
f82c70f54bd7 Uploaded okorol parents: diff changeset	26 If not, see: http://www.gnu.org/licenses/.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	27 ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	28
f82c70f54bd7 Uploaded okorol parents: diff changeset	29 ## BUGS:
f82c70f54bd7 Uploaded okorol parents: diff changeset	30 $bugs = "New features in this version ($app_version):\
f82c70f54bd7 Uploaded okorol parents: diff changeset	31 - None\
f82c70f54bd7 Uploaded okorol parents: diff changeset	32 \
f82c70f54bd7 Uploaded okorol parents: diff changeset	33 Fixed bugs in this version ($app_version):\
f82c70f54bd7 Uploaded okorol parents: diff changeset	34 - Fixed a bug causing newline characters to be occasionally skipped in the 'its1.full_and_partial.fasta' FASTA output file when the '--anchor' option was used\
f82c70f54bd7 Uploaded okorol parents: diff changeset	35 \
f82c70f54bd7 Uploaded okorol parents: diff changeset	36 Known bugs in this version ($app_version):\
f82c70f54bd7 Uploaded okorol parents: diff changeset	37 - None\
f82c70f54bd7 Uploaded okorol parents: diff changeset	38 ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	39
f82c70f54bd7 Uploaded okorol parents: diff changeset	40 ## OPTIONS:
f82c70f54bd7 Uploaded okorol parents: diff changeset	41 $options = "\
f82c70f54bd7 Uploaded okorol parents: diff changeset	42 -i {file} : DNA FASTA input file to investigate\
f82c70f54bd7 Uploaded okorol parents: diff changeset	43 -o {file} : Base for the names of output file(s)\
f82c70f54bd7 Uploaded okorol parents: diff changeset	44 -p {directory} : A path to a directory of HMM-profile collections representing ITS conserved regions, default is in the same directory as ITSx itself\
f82c70f54bd7 Uploaded okorol parents: diff changeset	45 --date {T or F} : Adds a date and time stamp to the output directory, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	46 --reset {T or F} : Re-creates the HMM-database before ITSx is run, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	47
f82c70f54bd7 Uploaded okorol parents: diff changeset	48 Sequence selection options:\
f82c70f54bd7 Uploaded okorol parents: diff changeset	49 -t {character code} : Profile set to use for the search, see the User's Guide (comma-separated), default is all\
f82c70f54bd7 Uploaded okorol parents: diff changeset	50 -E {value} : Domain E-value cutoff for a sequence to be included in the output, default = 1e-5\
f82c70f54bd7 Uploaded okorol parents: diff changeset	51 -S {value} : Domain score cutoff for a sequence to be included in the output, default = 0\
f82c70f54bd7 Uploaded okorol parents: diff changeset	52 -N {value} : The minimal number of domains that must match a sequence before it is included, default = 2\
f82c70f54bd7 Uploaded okorol parents: diff changeset	53 --selection_priority {sum, domains, eval, score} : Selects what will be of highest priority when determining the origin of the sequence, default is sum\
f82c70f54bd7 Uploaded okorol parents: diff changeset	54 --search_eval {value} : The E-value cutoff used in the HMMER search, high numbers may slow down the process, cannot be used with the --search_score option, default is 0.01\
f82c70f54bd7 Uploaded okorol parents: diff changeset	55 --search_score {value} : The score cutoff used in the HMMER search, low numbers may slow down the process, cannot be used with the --search_eval option, default is to used E-value cutoff, not score\
f82c70f54bd7 Uploaded okorol parents: diff changeset	56 --allow_single_domain {e-value,score or F} : Allow inclusion of sequences that only find a single domain, given that they meet the given E-value and score thresholds, on with parameters 1e-9,0 by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	57 --allow_reorder {T or F} : Allows profiles to be in the wrong order on extracted sequences, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	58 --complement {T or F} : Checks both DNA strands against the database, creating reverse complements, on (T) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	59 --cpu {value} : the number of CPU threads to use, default is 1\
f82c70f54bd7 Uploaded okorol parents: diff changeset	60 --multi_thread {T or F} : Multi-thread the HMMER-search, on (T) if number of CPUs (--cpu option > 1), else off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	61 --heuristics {T or F} : Selects whether to use HMMER's heuristic filtering, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	62
f82c70f54bd7 Uploaded okorol parents: diff changeset	63 Output options:\
f82c70f54bd7 Uploaded okorol parents: diff changeset	64 --summary {T or F} : Summary of results output, on (T) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	65 --graphical {T or F} : 'Graphical' output, on (T) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	66 --fasta {T or F} : FASTA-format output of extracted ITS sequences, on (T) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	67 --preserve {T or F} : Preserve sequence headers in input file instead of printing out ITSx headers, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	68 --save_regions {SSU,ITS1,5.8S,ITS2,LSU,all,none} : A comma separated list of regions to output separate FASTA files for, 'ITS1,ITS2' by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	69 --anchor {integer or HMM} : Saves an additional number of bases before and after each extracted region. If set to 'HMM' all bases matching the corresponding HMM will be output, default = 0\
f82c70f54bd7 Uploaded okorol parents: diff changeset	70 --only_full {T or F} : If true, output is limited to full-length regions, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	71 --partial {integer} : Saves additional FASTA-files for full and partial ITS sequences longer than the specified cutoff, default = 0 (off)\
f82c70f54bd7 Uploaded okorol parents: diff changeset	72 --concat {T or F} : Saves a FASTA-file with concatenated ITS sequences (with 5.8S removed), off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	73 --minlen {integer} : Minimum length the ITS regions must be to be outputted in the concatenated file (see above), default = 0\
f82c70f54bd7 Uploaded okorol parents: diff changeset	74 --positions {T or F} : Table format output containing the positions ITS sequences were found in, on (T) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	75 --table {T or F} : Table format output of sequences containing probable ITS sequences, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	76 --not_found {T or F} : Saves a list of non-found entries, on (T) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	77 --detailed_results {T or F} : Saves a tab-separated list of all results, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	78 --truncate {T or F} : Truncates the FASTA output to only contain the actual ITS sequences found, on (T) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	79 --silent {T or F} : Supresses printing progress info to stderr, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	80 --graph_scale {value} : Sets the scale of the graph output, if value is zero, a percentage view is shown, default = 0\
f82c70f54bd7 Uploaded okorol parents: diff changeset	81 --save_raw {T or F} : Saves all raw data for searches etc. instead of removing it on finish, off (F) by default\
f82c70f54bd7 Uploaded okorol parents: diff changeset	82
f82c70f54bd7 Uploaded okorol parents: diff changeset	83 -h : displays this help message\
f82c70f54bd7 Uploaded okorol parents: diff changeset	84 --help : displays this help message\
f82c70f54bd7 Uploaded okorol parents: diff changeset	85 --bugs : displays the bug fixes and known bugs in this version of ITSx\
f82c70f54bd7 Uploaded okorol parents: diff changeset	86 --license : displays licensing information\
f82c70f54bd7 Uploaded okorol parents: diff changeset	87 ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	88
f82c70f54bd7 Uploaded okorol parents: diff changeset	89
f82c70f54bd7 Uploaded okorol parents: diff changeset	90 ## Print title message
f82c70f54bd7 Uploaded okorol parents: diff changeset	91 print STDERR "$app_title\nby $app_author\nVersion: $app_version\n$app_message";
f82c70f54bd7 Uploaded okorol parents: diff changeset	92 print STDERR "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	93
f82c70f54bd7 Uploaded okorol parents: diff changeset	94 ## Setup default variable values
f82c70f54bd7 Uploaded okorol parents: diff changeset	95 use List::Util qw(first max maxstr min minstr reduce shuffle sum);
f82c70f54bd7 Uploaded okorol parents: diff changeset	96
f82c70f54bd7 Uploaded okorol parents: diff changeset	97 $bindir = $0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	98 $bindir =~ s/_x//;
f82c70f54bd7 Uploaded okorol parents: diff changeset	99 $input = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	100 $output = "ITSx_out";
f82c70f54bd7 Uploaded okorol parents: diff changeset	101 $hmmscan = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	102 $profileDB = "$bindir\_db/HMMs";
f82c70f54bd7 Uploaded okorol parents: diff changeset	103 $type = "all";
f82c70f54bd7 Uploaded okorol parents: diff changeset	104 $E = 1e-5;
f82c70f54bd7 Uploaded okorol parents: diff changeset	105 $S = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	106 $N = 2;
f82c70f54bd7 Uploaded okorol parents: diff changeset	107 $priority = "sum";
f82c70f54bd7 Uploaded okorol parents: diff changeset	108 $search_eval = 0.01;
f82c70f54bd7 Uploaded okorol parents: diff changeset	109 $search_score = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	110 $allow_single_E = 1e-9;
f82c70f54bd7 Uploaded okorol parents: diff changeset	111 $allow_single_score = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	112 #$allow_single_E = -1; # Turns off single-domain matching by E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	113 #$allow_single_score = 0; # Turns off single-domain matching by score
f82c70f54bd7 Uploaded okorol parents: diff changeset	114 $allow_reorder = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	115 $complement = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	116 $cpu = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	117 $multi_thread = "unset";
f82c70f54bd7 Uploaded okorol parents: diff changeset	118 $heuristics = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	119 $out_sum = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	120 $out_graph = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	121 $out_fasta = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	122 $out_preserve = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	123 $out_ssu = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	124 $out_its1 = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	125 $out_its2 = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	126 $out_58S = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	127 $out_lsu = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	128 $out_pos = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	129 $out_table = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	130 $out_not = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	131 $out_date = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	132 $out_joined = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	133 $out_results = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	134 $out_partial = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	135 $out_concat = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	136 $concat_minlen = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	137 $truncate = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	138 $anchor = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	139 $only_full = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	140 $graph_scale = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	141 $debug = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	142 $reset = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	143
f82c70f54bd7 Uploaded okorol parents: diff changeset	144 ## Read command-line options
f82c70f54bd7 Uploaded okorol parents: diff changeset	145 for ($i = 0; $i <= scalar(@ARGV); $i++) { # Goes through the list of arguments
f82c70f54bd7 Uploaded okorol parents: diff changeset	146 $arg = @ARGV[$i]; # Stores the current argument in $arg
f82c70f54bd7 Uploaded okorol parents: diff changeset	147
f82c70f54bd7 Uploaded okorol parents: diff changeset	148 if ($arg eq "-i") { # Read input files from -i flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	149 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	150 $input = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	151 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	152 if ($arg eq "-o") { # Read output files from -o flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	153 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	154 $output = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	155 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	156 if ($arg eq "-p") { # Read profile database from -p flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	157 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	158 $profileDB = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	159 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	160 if ($arg eq "--hmmscan") { # Read pre-computed hmmscan output file from --hmmscan flag ('undocumented' feature)
f82c70f54bd7 Uploaded okorol parents: diff changeset	161 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	162 $hmmscan = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	163 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	164 if ($arg eq "--date") { # Determine whether or not to add a date stamp based on the --date flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	165 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	166 if (substr(@ARGV[$i],0,1) =~ m/^[Ff0]/) { # Check if argument begins with "F", "f", or "0"
f82c70f54bd7 Uploaded okorol parents: diff changeset	167 $out_date = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	168 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	169 $out_date = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	170 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	171 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	172 if ($arg eq "--reset") { # Reset HMM database?
f82c70f54bd7 Uploaded okorol parents: diff changeset	173 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	174 if (substr(@ARGV[$i],0,1) =~ m/^[Ff0]/) { # Check if argument begins with "F", "f", or "0"
f82c70f54bd7 Uploaded okorol parents: diff changeset	175 $reset = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	176 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	177 $reset = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	178 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	179 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	180
f82c70f54bd7 Uploaded okorol parents: diff changeset	181 if ($arg eq "-t") { # Select what types of ITSs to look for using the -t flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	182 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	183 $type = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	184 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	185 if ($arg eq "-E") { # Set the E-value cutoff using the -E flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	186 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	187 $E = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	188 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	189 if ($arg eq "-S") { # Set the score cutoff using the -S flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	190 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	191 $S = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	192 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	193 if ($arg eq "-N") { # Set the number of found domains cutoff using the -N flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	194 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	195 $N = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	196 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	197 if ($arg eq "--selection_priority") { # Set how to order the ITS types using the --selection_priority flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	198 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	199 $priority = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	200 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	201 if ($arg eq "--search_eval") { # Set the E-value cutoff for the HMMER search using the --search_eval flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	202 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	203 $search_eval = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	204 $search_score = ""; # Turns off score cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	205 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	206 if ($arg eq "--search_score") { # Set the score cutoff for the HMMER search using the --search_score flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	207 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	208 $search_score = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	209 $search_eval = ""; # Turns off E-value cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	210 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	211 if ($arg eq "--allow_single_domain") { # Determine whether or not to allow single domain matches based on the --allow_single_domain flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	212 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	213 if (substr(@ARGV[$i],0,1) =~ m/^[Ff0]/) { # Check if argument begins with "F", "f", or "0"
f82c70f54bd7 Uploaded okorol parents: diff changeset	214 $allow_single_E = -1; # Turns off single-domain matching by E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	215 $allow_single_score = 0; # Turns off single-domain matching by score
f82c70f54bd7 Uploaded okorol parents: diff changeset	216 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	217 ($allow_single_E,$allow_single_score) = split(',',@ARGV[$i]); # Turns on single-domain matching, assigning the first given value as the E-value cutoff, and the second as score cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	218 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	219 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	220 if ($arg eq "--allow_reorder") { # Determine whether or not to allow the domains to be in the wrong order based on the --allow_reorder flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	221 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	222 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	223 $allow_reorder = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	224 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	225 $allow_reorder = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	226 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	227 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	228 if ($arg eq "--complement") { # Determine whether or not to scan the complementary strand of the input file based on the --complement flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	229 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	230 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	231 $complement = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	232 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	233 $complement = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	234 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	235 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	236 if ($arg eq "--cpu") { # Set the number of CPUs to use based on the --cpu flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	237 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	238 $cpu = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	239 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	240 if ($arg eq "--multi_thread") { # Determine whether or not to multi-thread the HMMER step based on the --multi_thread flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	241 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	242 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	243 $multi_thread = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	244 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	245 $multi_thread = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	246 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	247 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	248 if ($arg eq "--heuristics") { # Determine whether or not to use HMMER's heuristic filtering based on the --heuristics flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	249 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	250 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	251 $heuristics = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	252 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	253 $heuristics = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	254 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	255 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	256
f82c70f54bd7 Uploaded okorol parents: diff changeset	257 if ($arg eq "--summary") { # Determine whether or not to output a summary based on the --summary flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	258 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	259 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	260 $out_sum = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	261 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	262 $out_sum = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	263 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	264 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	265 if ($arg eq "--graphical") { # Determine whether or not to output a graphical representation of matches based on the --graphical flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	266 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	267 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	268 $out_graph = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	269 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	270 $out_graph = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	271 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	272 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	273 if ($arg eq "--detailed_results") { # Determine whether or not to output a detailed results list, based on the --detailed_results flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	274 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	275 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	276 $out_results = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	277 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	278 $out_results = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	279 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	280 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	281 if ($arg eq "--partial") { # Set the full-and-partial cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	282 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	283 $out_partial = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	284 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	285 if ($arg eq "--anchor") { # Set the length of the sequence "anchors"
f82c70f54bd7 Uploaded okorol parents: diff changeset	286 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	287 $anchor = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	288 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	289 if ($arg eq "--only_full") { # Output only full-length regions
f82c70f54bd7 Uploaded okorol parents: diff changeset	290 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	291 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	292 $only_full = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	293 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	294 $only_full = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	295 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	296 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	297 if ($arg eq "--save_regions") { # Determine which regions to output FASTA files for based on the --save_regions flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	298 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	299 @save_regions = split(',',uc(@ARGV[$i]));
f82c70f54bd7 Uploaded okorol parents: diff changeset	300 $out_ssu = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	301 $out_its1 = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	302 $out_its2 = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	303 $out_58S = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	304 $out_lsu = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	305 foreach $save_region (@save_regions) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	306 if ($save_region eq "SSU") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	307 $out_ssu = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	308 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	309 if ($save_region eq "ITS1") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	310 $out_its1 = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	311 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	312 if ($save_region eq "5.8S") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	313 $out_58S = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	314 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	315 if ($save_region eq "ITS2") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	316 $out_its2 = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	317 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	318 if ($save_region eq "LSU") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	319 $out_lsu = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	320 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	321 if ($save_region eq "ALL") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	322 $out_ssu = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	323 $out_its1 = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	324 $out_its2 = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	325 $out_58S = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	326 $out_lsu = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	327 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	328 if ($save_region eq "NONE") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	329 $out_ssu = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	330 $out_its1 = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	331 $out_its2 = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	332 $out_58S = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	333 $out_lsu = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	334 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	335 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	336 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	337 if ($arg eq "--positions") { # Determine whether or not to output a positions file based on the --positions flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	338 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	339 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	340 $out_pos = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	341 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	342 $out_pos = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	343 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	344 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	345 if ($arg eq "--concat") { # Determine whether or not to output a concatednated ITS1 + ITS2 file
f82c70f54bd7 Uploaded okorol parents: diff changeset	346 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	347 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	348 $out_concat = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	349 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	350 $out_concat = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	351 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	352 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	353 if ($arg eq "--minlen") { # Set the min length of the combined ITS1 and ITS2 sequences for concatenation
f82c70f54bd7 Uploaded okorol parents: diff changeset	354 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	355 $concat_minlen = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	356 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	357 if ($arg eq "--fasta") { # Determine whether or not to output FASTA-files based on the --fasta flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	358 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	359 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	360 $out_fasta = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	361 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	362 $out_fasta = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	363 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	364 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	365 if ($arg eq "--preserve") { # Determine whether or not to preserve FASTA-headers based on the --preserve flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	366 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	367 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	368 $out_preserve = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	369 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	370 $out_preserve = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	371 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	372 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	373 if ($arg eq "--joined") { # Determine whether or not to output a FASTA-file containing ALL sorts of output sequences (for debugging)
f82c70f54bd7 Uploaded okorol parents: diff changeset	374 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	375 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	376 $out_joined = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	377 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	378 $out_joined = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	379 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	380 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	381 if ($arg eq "--table") { # Determine whether or not to output tables of all potential matches based on the --table flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	382 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	383 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	384 $out_table = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	385 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	386 $out_table = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	387 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	388 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	389 if ($arg eq "--not_found") { # Determine whether or not to output a list of sequences that are not ITSs based on the --not_found flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	390 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	391 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	392 $out_not = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	393 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	394 $out_not = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	395 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	396 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	397 if ($arg eq "--silent") { # Determine whether or not to output anything to the screen based on the --silent flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	398 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	399 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	400 $silent = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	401 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	402 $silent = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	403 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	404 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	405 if ($arg eq "--graph_scale") { # Set the scale of the graphical output based on the --graph_scale flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	406 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	407 $graph_scale = @ARGV[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	408 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	409 if ($arg eq "--save_raw") { # Determine whether or not to save all the raw intermediate data based on the --save_raw flag
f82c70f54bd7 Uploaded okorol parents: diff changeset	410 $i++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	411 if (substr(@ARGV[$i],0,1) =~ m/^[Tt1]/) { # Check if argument begins with "T", "t", or "1"
f82c70f54bd7 Uploaded okorol parents: diff changeset	412 $save_raw = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	413 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	414 $save_raw = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	415 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	416 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	417
f82c70f54bd7 Uploaded okorol parents: diff changeset	418 ## If "-h" or "--help" are among the options, output usage data and options
f82c70f54bd7 Uploaded okorol parents: diff changeset	419 if (($arg eq "-h") \|\| ($arg eq "--help")) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	420 print "Usage: ITSx -i <input file> -o <output file>\nOptions:$options";
f82c70f54bd7 Uploaded okorol parents: diff changeset	421 print "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	422 exit; # Exit ITSx
f82c70f54bd7 Uploaded okorol parents: diff changeset	423 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	424
f82c70f54bd7 Uploaded okorol parents: diff changeset	425 ## If "--bugs" is among the options, output bugs and features information
f82c70f54bd7 Uploaded okorol parents: diff changeset	426 if ($arg eq "--bugs") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	427 print "$bugs\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	428 exit; # Exit ITSx
f82c70f54bd7 Uploaded okorol parents: diff changeset	429 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	430
f82c70f54bd7 Uploaded okorol parents: diff changeset	431 ## If "--license" is among the options, output license information
f82c70f54bd7 Uploaded okorol parents: diff changeset	432 if ($arg eq "--license") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	433 print "$license\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	434 exit; # Exit ITSx
f82c70f54bd7 Uploaded okorol parents: diff changeset	435 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	436
f82c70f54bd7 Uploaded okorol parents: diff changeset	437 if ($arg eq "--debug") { # Run ITSx in debug mode
f82c70f54bd7 Uploaded okorol parents: diff changeset	438 $debug = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	439 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	440 if ($arg eq "--pipeline") { # Run ITSx in pipeline mode
f82c70f54bd7 Uploaded okorol parents: diff changeset	441 $pipeline = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	442 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	443 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	444
f82c70f54bd7 Uploaded okorol parents: diff changeset	445 ## Setup some variables dependent on input
f82c70f54bd7 Uploaded okorol parents: diff changeset	446
f82c70f54bd7 Uploaded okorol parents: diff changeset	447 if ($multi_thread eq "unset") { # If the multi-thread option is not set
f82c70f54bd7 Uploaded okorol parents: diff changeset	448 if ($cpu > 1) { # Then if the number of CPUs used > 1, then multi-thread HMMER searches
f82c70f54bd7 Uploaded okorol parents: diff changeset	449 $multi_thread = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	450 } else { # Else, run HMMER searches sequentially on one CPU
f82c70f54bd7 Uploaded okorol parents: diff changeset	451 $multi_thread = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	452 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	453 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	454
f82c70f54bd7 Uploaded okorol parents: diff changeset	455 if ($hmmscan ne "") { # If a pre-computed hmmscan output is supplied
f82c70f54bd7 Uploaded okorol parents: diff changeset	456 $output = $hmmscan; # Then set the base of the output directory name to be the same as that hmmscan output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	457 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	458
f82c70f54bd7 Uploaded okorol parents: diff changeset	459 ## Check for binaries
f82c70f54bd7 Uploaded okorol parents: diff changeset	460
f82c70f54bd7 Uploaded okorol parents: diff changeset	461 chomp($path = `which hmmpress`); # Get the path for hmmpress
f82c70f54bd7 Uploaded okorol parents: diff changeset	462 if ($path eq "") { # If the path is empty, then show an error message and exit ITSx
f82c70f54bd7 Uploaded okorol parents: diff changeset	463 print STDERR "FATAL ERROR :: Could not locate HMMER binaries! It seems that hmmpress is not installed properly.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	464 Consult the manual for installation instructions. Note that HMMER3 is required. Previous HMMER-versions will not work.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	465 This error is fatal, and ITSx will now abort.\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	466 print STDERR "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	467 exit;
f82c70f54bd7 Uploaded okorol parents: diff changeset	468 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	469
f82c70f54bd7 Uploaded okorol parents: diff changeset	470 chomp($path = `which hmmscan`); # Get the path for hmmscan
f82c70f54bd7 Uploaded okorol parents: diff changeset	471 if ($path eq "") { # If the path is empty, then show an error message and exit ITSx
f82c70f54bd7 Uploaded okorol parents: diff changeset	472 print STDERR "FATAL ERROR :: Could not locate HMMER binaries! It seems that hmmscan is not installed properly.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	473 Consult the manual for installation instructions. Note that HMMER3 is required. Previous HMMER-versions will not work.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	474 This error is fatal, and ITSx will now abort.\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	475 print STDERR "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	476 exit;
f82c70f54bd7 Uploaded okorol parents: diff changeset	477 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	478
f82c70f54bd7 Uploaded okorol parents: diff changeset	479
f82c70f54bd7 Uploaded okorol parents: diff changeset	480 ## Check for database
f82c70f54bd7 Uploaded okorol parents: diff changeset	481 chomp($errormsg = `ls $profileDB* 2>&1 1>/dev/null`); # Get the error msg when looking for the profile database
f82c70f54bd7 Uploaded okorol parents: diff changeset	482 if (substr($errormsg,0,4) eq "ls: ") { # If the error message begins with "ls: ", then show an error message and exit ITSx
f82c70f54bd7 Uploaded okorol parents: diff changeset	483 print STDERR "FATAL ERROR :: The specified profile database could not be found.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	484 Consult the manual for installation instructions.\
f82c70f54bd7 Uploaded okorol parents: diff changeset	485 This error is fatal, and ITSx will now abort.\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	486 print STDERR "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	487 exit;
f82c70f54bd7 Uploaded okorol parents: diff changeset	488 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	489
f82c70f54bd7 Uploaded okorol parents: diff changeset	490 if ($pipeline == 0) { # If ITSx is not run in pipeline mode (i.e. from ITSx)
f82c70f54bd7 Uploaded okorol parents: diff changeset	491 if ($out_date == 1) { # If a date and time stamp should be supplied
f82c70f54bd7 Uploaded okorol parents: diff changeset	492 ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); # Get the date and time
f82c70f54bd7 Uploaded okorol parents: diff changeset	493 $year = $year + 1900; # Format the year
f82c70f54bd7 Uploaded okorol parents: diff changeset	494 $mon = $mon + 1; # Format the month
f82c70f54bd7 Uploaded okorol parents: diff changeset	495 if ($mon < 10) { # Add a zero to the month, if needed
f82c70f54bd7 Uploaded okorol parents: diff changeset	496 $mon = "0" . $mon;
f82c70f54bd7 Uploaded okorol parents: diff changeset	497 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	498 if ($mday < 10) { # Add a zero to the day, if needed
f82c70f54bd7 Uploaded okorol parents: diff changeset	499 $mday = "0" . $mday;
f82c70f54bd7 Uploaded okorol parents: diff changeset	500 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	501 if ($hour < 10) { # Add a zero to the hour, if needed
f82c70f54bd7 Uploaded okorol parents: diff changeset	502 $hour = "0" . $hour;
f82c70f54bd7 Uploaded okorol parents: diff changeset	503 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	504 if ($min < 10) { # Add a zero to the minute, if needed
f82c70f54bd7 Uploaded okorol parents: diff changeset	505 $min = "0" . $min;
f82c70f54bd7 Uploaded okorol parents: diff changeset	506 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	507 $outputDate = ".$year\-$mon\-$mday\_$hour.$min"; # Create a date and time stamp
f82c70f54bd7 Uploaded okorol parents: diff changeset	508 $outputDate =~ s./.-.g; # Remove any potential slashes in the output name (as this will confuse ITSx's file naming)
f82c70f54bd7 Uploaded okorol parents: diff changeset	509 $output = $output . $outputDate; # Add the date and time stamp top the output base name
f82c70f54bd7 Uploaded okorol parents: diff changeset	510 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	511 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	512
f82c70f54bd7 Uploaded okorol parents: diff changeset	513 $tempDir = "ITSx_temp_directory__$output"; # Setup a temporary directory variable
f82c70f54bd7 Uploaded okorol parents: diff changeset	514 $tempDir =~ s./.-.g; # Remove any potential slashes in the output name (as this will confuse ITSx's file naming)
f82c70f54bd7 Uploaded okorol parents: diff changeset	515
f82c70f54bd7 Uploaded okorol parents: diff changeset	516 if ($pipeline == 0) { # If not running in pipeline mode
f82c70f54bd7 Uploaded okorol parents: diff changeset	517 ## Create a summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	518 if ($out_sum == 1) { # If summary output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	519 $now = localtime; # Get the current time
f82c70f54bd7 Uploaded okorol parents: diff changeset	520 open (SUMMARY, ">$output.summary.txt"); # Create the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	521 print SUMMARY "ITSx run started at $now.\n"; # Output the starting time for the analysis
f82c70f54bd7 Uploaded okorol parents: diff changeset	522 print SUMMARY "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	523 close (SUMMARY); # Close summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	524 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	525 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	526
f82c70f54bd7 Uploaded okorol parents: diff changeset	527 ## Create a temporary directory for ITSx
f82c70f54bd7 Uploaded okorol parents: diff changeset	528 if ($pipeline == 0) { # If ITSx is not run in pipeline mode (i.e. from ITSx)
f82c70f54bd7 Uploaded okorol parents: diff changeset	529 `mkdir $tempDir 2> /dev/null`; # Create a temporary directory
f82c70f54bd7 Uploaded okorol parents: diff changeset	530 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	531
f82c70f54bd7 Uploaded okorol parents: diff changeset	532
f82c70f54bd7 Uploaded okorol parents: diff changeset	533 ## Prepare profile database
f82c70f54bd7 Uploaded okorol parents: diff changeset	534 ## Get the current time and output info message
f82c70f54bd7 Uploaded okorol parents: diff changeset	535 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	536 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	537 print STDERR "$now : Preparing HMM database (should be quick)...\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	538 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	539
f82c70f54bd7 Uploaded okorol parents: diff changeset	540 ## Setup profile index
f82c70f54bd7 Uploaded okorol parents: diff changeset	541 %profileIndex = {};
f82c70f54bd7 Uploaded okorol parents: diff changeset	542
f82c70f54bd7 Uploaded okorol parents: diff changeset	543 # A = Alveolata
f82c70f54bd7 Uploaded okorol parents: diff changeset	544 # B = Bryophyta
f82c70f54bd7 Uploaded okorol parents: diff changeset	545 # C = Bacillariophyta
f82c70f54bd7 Uploaded okorol parents: diff changeset	546 # D = Amoebozoa
f82c70f54bd7 Uploaded okorol parents: diff changeset	547 # E = Euglenozoa
f82c70f54bd7 Uploaded okorol parents: diff changeset	548 # F = Fungi
f82c70f54bd7 Uploaded okorol parents: diff changeset	549 # G = Chlorophyta (green algae)
f82c70f54bd7 Uploaded okorol parents: diff changeset	550 # H = Rhodophyta (red algae)
f82c70f54bd7 Uploaded okorol parents: diff changeset	551 # I = Phaeophyceae (brown algae)
f82c70f54bd7 Uploaded okorol parents: diff changeset	552 # L = Marchantiophyta (liverworts)
f82c70f54bd7 Uploaded okorol parents: diff changeset	553 # M = Metazoa
f82c70f54bd7 Uploaded okorol parents: diff changeset	554 # N = Microsporidia
f82c70f54bd7 Uploaded okorol parents: diff changeset	555 # O = Oomycota
f82c70f54bd7 Uploaded okorol parents: diff changeset	556 # P = Haptophyceae (prymnesiophytes)
f82c70f54bd7 Uploaded okorol parents: diff changeset	557 # Q = Raphidophyceae
f82c70f54bd7 Uploaded okorol parents: diff changeset	558 # R = Rhizaria
f82c70f54bd7 Uploaded okorol parents: diff changeset	559 # S = Synurophyceae
f82c70f54bd7 Uploaded okorol parents: diff changeset	560 # T = Tracheophyta (higher plants)
f82c70f54bd7 Uploaded okorol parents: diff changeset	561 # U = Eustigmatophyceae
f82c70f54bd7 Uploaded okorol parents: diff changeset	562 # X = Apusozoa
f82c70f54bd7 Uploaded okorol parents: diff changeset	563 # Y = Parabasalia
f82c70f54bd7 Uploaded okorol parents: diff changeset	564
f82c70f54bd7 Uploaded okorol parents: diff changeset	565 $profileIndex{"A"} = "alveolates";
f82c70f54bd7 Uploaded okorol parents: diff changeset	566 $profileIndex{"B"} = "bryophyta";
f82c70f54bd7 Uploaded okorol parents: diff changeset	567 $profileIndex{"C"} = "bacillariophyta";
f82c70f54bd7 Uploaded okorol parents: diff changeset	568 $profileIndex{"D"} = "amoebozoa";
f82c70f54bd7 Uploaded okorol parents: diff changeset	569 $profileIndex{"E"} = "euglenozoa";
f82c70f54bd7 Uploaded okorol parents: diff changeset	570 $profileIndex{"F"} = "fungi";
f82c70f54bd7 Uploaded okorol parents: diff changeset	571 $profileIndex{"G"} = "chlorophyta";
f82c70f54bd7 Uploaded okorol parents: diff changeset	572 $profileIndex{"H"} = "rhodophyta";
f82c70f54bd7 Uploaded okorol parents: diff changeset	573 $profileIndex{"I"} = "phaeophyceae";
f82c70f54bd7 Uploaded okorol parents: diff changeset	574 $profileIndex{"J"} = "undefined";
f82c70f54bd7 Uploaded okorol parents: diff changeset	575 $profileIndex{"K"} = "undefined";
f82c70f54bd7 Uploaded okorol parents: diff changeset	576 $profileIndex{"L"} = "marchantiophyta";
f82c70f54bd7 Uploaded okorol parents: diff changeset	577 $profileIndex{"M"} = "metazoa";
f82c70f54bd7 Uploaded okorol parents: diff changeset	578 $profileIndex{"N"} = "microsporidia";
f82c70f54bd7 Uploaded okorol parents: diff changeset	579 $profileIndex{"O"} = "oomycota";
f82c70f54bd7 Uploaded okorol parents: diff changeset	580 $profileIndex{"P"} = "haptophyceae";
f82c70f54bd7 Uploaded okorol parents: diff changeset	581 $profileIndex{"Q"} = "raphidophyceae";
f82c70f54bd7 Uploaded okorol parents: diff changeset	582 $profileIndex{"R"} = "rhizaria";
f82c70f54bd7 Uploaded okorol parents: diff changeset	583 $profileIndex{"S"} = "synurophyceae";
f82c70f54bd7 Uploaded okorol parents: diff changeset	584 $profileIndex{"T"} = "tracheophyta";
f82c70f54bd7 Uploaded okorol parents: diff changeset	585 $profileIndex{"U"} = "eustigmatophyceae";
f82c70f54bd7 Uploaded okorol parents: diff changeset	586 $profileIndex{"V"} = "undefined";
f82c70f54bd7 Uploaded okorol parents: diff changeset	587 $profileIndex{"W"} = "undefined";
f82c70f54bd7 Uploaded okorol parents: diff changeset	588 $profileIndex{"X"} = "apusozoa";
f82c70f54bd7 Uploaded okorol parents: diff changeset	589 $profileIndex{"Y"} = "parabasalia";
f82c70f54bd7 Uploaded okorol parents: diff changeset	590 $profileIndex{"Z"} = "undefined";
f82c70f54bd7 Uploaded okorol parents: diff changeset	591
f82c70f54bd7 Uploaded okorol parents: diff changeset	592
f82c70f54bd7 Uploaded okorol parents: diff changeset	593 @profileList = split(',',uc($type)); # Get the list of profile types
f82c70f54bd7 Uploaded okorol parents: diff changeset	594 foreach $entry (@profileList) { # Go through the entered types
f82c70f54bd7 Uploaded okorol parents: diff changeset	595 if (($entry eq "ALL") \|\| ($entry eq ".")) { # If "all" among the entries
f82c70f54bd7 Uploaded okorol parents: diff changeset	596 push(@profileSet,"A"); # Add the alveolates profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	597 push(@profileSet,"B"); # Add the bryophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	598 push(@profileSet,"C"); # Add the bacillariophyta profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	599 push(@profileSet,"D"); # Add the amoebozoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	600 push(@profileSet,"E"); # Add the euglenozoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	601 push(@profileSet,"F"); # Add the fungi profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	602 push(@profileSet,"G"); # Add the green algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	603 push(@profileSet,"H"); # Add the red algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	604 push(@profileSet,"I"); # Add the brown algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	605 push(@profileSet,"L"); # Add the liverworts profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	606 push(@profileSet,"M"); # Add the metazoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	607 #push(@profileSet,"N"); # Add the microsporidia profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	608 push(@profileSet,"O"); # Add the oomycetes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	609 push(@profileSet,"P"); # Add the prymnesiophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	610 push(@profileSet,"Q"); # Add the raphidophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	611 push(@profileSet,"R"); # Add the rhizaria profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	612 push(@profileSet,"S"); # Add the synurophyceae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	613 push(@profileSet,"T"); # Add the tracheophyta (higher plants) profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	614 push(@profileSet,"U"); # Add the eustigmatophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	615 #push(@profileSet,"X"); # Add the apusozoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	616 #push(@profileSet,"Y"); # Add the parabasalia profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	617 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	618 if (length($entry) == 1) { # If the name has only one character
f82c70f54bd7 Uploaded okorol parents: diff changeset	619 if ($entry =~ m/[ABCDERFHILMNOPQRSTUXY]/) { # If the selected set exists
f82c70f54bd7 Uploaded okorol parents: diff changeset	620 push(@profileSet,$entry); # Add the selected profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	621 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	622 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	623 if ($entry =~ m/ALVEOL/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	624 push(@profileSet,"A"); # Add the alveolates profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	625 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	626 if ($entry =~ m/BRYO/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	627 push(@profileSet,"B"); # Add the bryophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	628 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	629 if ($entry =~ m/MOSS/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	630 push(@profileSet,"B"); # Add the bryophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	631 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	632 if ($entry =~ m/BACILL/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	633 push(@profileSet,"C"); # Add the bacillariophyta profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	634 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	635 if ($entry =~ m/DIATOM/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	636 push(@profileSet,"C"); # Add the bacillariophyta profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	637 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	638 if ($entry =~ m/AMOEB/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	639 push(@profileSet,"D"); # Add the amoebozoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	640 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	641 if ($entry =~ m/EUGLE/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	642 push(@profileSet,"E"); # Add the euglenozoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	643 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	644 if ($entry =~ m/FUNG/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	645 push(@profileSet,"F"); # Add the fungi profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	646 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	647 if ($entry =~ m/GREEN/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	648 push(@profileSet,"G"); # Add the green algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	649 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	650 if ($entry =~ m/CHLORO/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	651 push(@profileSet,"G"); # Add the green algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	652 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	653 if ($entry =~ m/RED-AL/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	654 push(@profileSet,"H"); # Add the red algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	655 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	656 if ($entry =~ m/RHODO/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	657 push(@profileSet,"H"); # Add the red algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	658 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	659 if ($entry =~ m/BROWN/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	660 push(@profileSet,"I"); # Add the brown algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	661 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	662 if ($entry =~ m/PHAEOP/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	663 push(@profileSet,"I"); # Add the brown algae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	664 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	665 if ($entry =~ m/LIVER/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	666 push(@profileSet,"L"); # Add the liverworts profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	667 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	668 if ($entry =~ m/MARCH/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	669 push(@profileSet,"L"); # Add the liverworts profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	670 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	671 if ($entry =~ m/METAZ/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	672 push(@profileSet,"M"); # Add the metazoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	673 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	674 if ($entry =~ m/ANIMAL/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	675 push(@profileSet,"M"); # Add the metazoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	676 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	677 if ($entry =~ m/MICROSPOR/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	678 push(@profileSet,"N"); # Add the microsporidia profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	679 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	680 if ($entry =~ m/OOMYC/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	681 push(@profileSet,"O"); # Add the oomycetes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	682 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	683 if ($entry =~ m/PRYMN/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	684 push(@profileSet,"P"); # Add the prymnesiophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	685 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	686 if ($entry =~ m/HAPTO/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	687 push(@profileSet,"P"); # Add the prymnesiophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	688 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	689 if ($entry =~ m/RAPHID/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	690 push(@profileSet,"Q"); # Add the raphidophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	691 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	692 if ($entry =~ m/RHIZA/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	693 push(@profileSet,"R"); # Add the rhizaria profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	694 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	695 if ($entry =~ m/SYNUR/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	696 push(@profileSet,"S"); # Add the synurophyceae profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	697 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	698 if ($entry =~ m/TRACHE/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	699 push(@profileSet,"T"); # Add the tracheophyta profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	700 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	701 if ($entry =~ m/PLANTS/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	702 push(@profileSet,"T"); # Add the tracheophyta profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	703 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	704 if ($entry =~ m/EUSTIG/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	705 push(@profileSet,"U"); # Add the eustigmatophytes profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	706 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	707 if ($entry =~ m/APUSO/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	708 push(@profileSet,"X"); # Add the apusozoa profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	709 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	710 if ($entry =~ m/PARAB/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	711 push(@profileSet,"Y"); # Add the parabasalia profiles to the investigation set
f82c70f54bd7 Uploaded okorol parents: diff changeset	712 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	713 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	714 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	715 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	716
f82c70f54bd7 Uploaded okorol parents: diff changeset	717 foreach $set (@profileSet) { # For each set of profiles in the the full profile set for investigation
f82c70f54bd7 Uploaded okorol parents: diff changeset	718 $hmmPath = $profileDB . "/" . $set . ".hmm"; # Determine the path to the HMM-file
f82c70f54bd7 Uploaded okorol parents: diff changeset	719 chomp($modelCount = `grep -c "//" $hmmPath`); # Count the number of models in the HMM-file
f82c70f54bd7 Uploaded okorol parents: diff changeset	720 push(@modelCount,$modelCount); # Add the number of models in this HMM-file to the list of model counts
f82c70f54bd7 Uploaded okorol parents: diff changeset	721 if ($reset == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	722 `rm -f $hmmPath.h3* 2> /dev/null`; # Delete old HMM-files
f82c70f54bd7 Uploaded okorol parents: diff changeset	723 `hmmpress $hmmPath 2> /dev/null`; # Prepare the HMM-file for searching
f82c70f54bd7 Uploaded okorol parents: diff changeset	724 ## Redirecting stderr is a quick and dirty solution to get rid of the messages... Could be made more elegant
f82c70f54bd7 Uploaded okorol parents: diff changeset	725 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	726 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	727
f82c70f54bd7 Uploaded okorol parents: diff changeset	728 ## Clean-up input files and create complementary strand if needed
f82c70f54bd7 Uploaded okorol parents: diff changeset	729 ## Get the current time and output an info message
f82c70f54bd7 Uploaded okorol parents: diff changeset	730 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	731 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	732 print STDERR "$now : Checking and handling input sequence data (should not take long)...\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	733 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	734
f82c70f54bd7 Uploaded okorol parents: diff changeset	735 ## Open the summary file for writing
f82c70f54bd7 Uploaded okorol parents: diff changeset	736 if ($out_sum == 1) { # If summary output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	737 open (SUMMARY, ">>$output.summary.txt"); # Append to the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	738 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	739
f82c70f54bd7 Uploaded okorol parents: diff changeset	740 if ($input ne "") { # If an input file is given
f82c70f54bd7 Uploaded okorol parents: diff changeset	741 ## Read from file
f82c70f54bd7 Uploaded okorol parents: diff changeset	742 open (SEQUENCES, $input); # Open the input file for reading
f82c70f54bd7 Uploaded okorol parents: diff changeset	743 open (MAIN, ">$tempDir/main.fasta"); # Create a temporary file for storing the cleaned sequences representing the main strand
f82c70f54bd7 Uploaded okorol parents: diff changeset	744 open (COMPLEMENT, ">$tempDir/complement.fasta"); # Create a temporary file for storing the cleaned sequences representing the complementary strand
f82c70f54bd7 Uploaded okorol parents: diff changeset	745 $inputSequenceCount = 0; # Reset input sequence counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	746 while ($sequence = <SEQUENCES>) { # Repeat for every line in the input file
f82c70f54bd7 Uploaded okorol parents: diff changeset	747 chomp($sequence); # Truncate any potential line feeds
f82c70f54bd7 Uploaded okorol parents: diff changeset	748 $sequence =~ tr/\r\n//d; # Remove all carriage return and new line characters
f82c70f54bd7 Uploaded okorol parents: diff changeset	749 if (substr($sequence,0,1) eq ">") { # If a new FASTA entry is found in the input file
f82c70f54bd7 Uploaded okorol parents: diff changeset	750 $inputSequenceCount++; # Add one to the input sequence counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	751 print MAIN $mainSeq . "\n"; # Write the previous main DNA sequence to the main sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	752 print MAIN $sequence . " main\n"; # Write the definition line of the new sequence to the main sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	753 $sequenceDB{"$sequenceID"} = $mainSeq; # Add sequence to sequence database
f82c70f54bd7 Uploaded okorol parents: diff changeset	754 $headers{"$sequenceID"} = $header; # Add the header to the header database
f82c70f54bd7 Uploaded okorol parents: diff changeset	755 push(@sequenceOrder,$sequenceID); # Add this sequence ID to the ordered list of sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	756 ($sequenceID) = split(" ",substr($sequence,1));
f82c70f54bd7 Uploaded okorol parents: diff changeset	757 $header = $sequence; # Save the sequence header
f82c70f54bd7 Uploaded okorol parents: diff changeset	758 $mainSeq = ""; # Empty the main sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	759 if ($complement == 1) { # If the complementary file should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	760 $complementSeq = reverse($complementSeq); # Reverse the complementary DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	761 print COMPLEMENT $complementSeq . "\n"; # Write the previous complementary DNA sequence to the complementary sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	762 print COMPLEMENT $sequence . " complement\n"; # Write the definition line of the new sequence to the complement sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	763 $complementSeq = ""; # Empty the complementary sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	764 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	765 } else { # If this line is just a continuation of the current DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	766 $mseq = $sequence; # Store this part of the DNA sequence in the intermediate varaible $mseq
f82c70f54bd7 Uploaded okorol parents: diff changeset	767 $mseq =~ s/[ .-]//g; # Remove any gap characters present in this sequence part (good if input was an alignment)
f82c70f54bd7 Uploaded okorol parents: diff changeset	768 $mseq =~ tr/[a-z]/[A-Z]/; # Make all letters uppercase
f82c70f54bd7 Uploaded okorol parents: diff changeset	769 $mseq =~ s/[^A-Z]//g; # Remove all non-alphabetic characters
f82c70f54bd7 Uploaded okorol parents: diff changeset	770 $mseq =~ tr/U/T/; # Exchanges U:s for T:s (Uracil to Thymine, good if input was RNA sequence)
f82c70f54bd7 Uploaded okorol parents: diff changeset	771 $mainSeq = $mainSeq . $mseq; # Add the intermediate DNA sequence to the end of the main DNA sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	772
f82c70f54bd7 Uploaded okorol parents: diff changeset	773 if ($complement == 1) { # If the complementary file should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	774 $cseq = $sequence; # Store this part of the DNA sequence in the intermediate varaible $cseq
f82c70f54bd7 Uploaded okorol parents: diff changeset	775 $cseq =~ s/[ .-]//g; # Remove any gap characters present in this sequence part (good if input was an alignment)
f82c70f54bd7 Uploaded okorol parents: diff changeset	776 $cseq =~ tr/[a-z]/[A-Z]/; # Make all letters uppercase
f82c70f54bd7 Uploaded okorol parents: diff changeset	777 $cseq =~ s/[^A-Z]//g; # Remove all non-alphabetic characters
f82c70f54bd7 Uploaded okorol parents: diff changeset	778 $cseq =~ tr/ACGTURYSWKMBDHVN/TGCAAYRWSMKVHDBN/; # Replace all characters with its complementary base
f82c70f54bd7 Uploaded okorol parents: diff changeset	779 $complementSeq = $complementSeq . $cseq; # Add the intermediate DNA sequence to the end of the complementary DNA sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	780 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	781 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	782 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	783 ## When the input file's end is reached
f82c70f54bd7 Uploaded okorol parents: diff changeset	784 print MAIN $mainSeq . "\n"; # Write the last main DNA sequence to the main sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	785 if ($complement == 1) { # If the complementary file should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	786 $complementSeq = reverse($complementSeq); # Reverse the complementary DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	787 print COMPLEMENT $complementSeq . "\n"; # Write the last complementary DNA sequence to the complementary sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	788 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	789 $sequenceDB{"$sequenceID"} = $mainSeq; # Add sequence to sequence database
f82c70f54bd7 Uploaded okorol parents: diff changeset	790 $headers{"$sequenceID"} = $header; # Add the header to the header database
f82c70f54bd7 Uploaded okorol parents: diff changeset	791 push(@sequenceOrder,$sequenceID); # Add this sequence ID to the ordered list of sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	792 $mainSeq = ""; # Empty the main sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	793 $complementSeq = ""; # Empty the complementary sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	794 close (SEQUENCES); # Close the sequence input file
f82c70f54bd7 Uploaded okorol parents: diff changeset	795 close (COMPLEMENT); # Close the complementary output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	796 close (MAIN); # Close the main output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	797 } else { # If no input file is supplied, then read from stdin instead
f82c70f54bd7 Uploaded okorol parents: diff changeset	798 $input = "$tempDir/main.fasta"; # Set up a temporary input file path
f82c70f54bd7 Uploaded okorol parents: diff changeset	799 open (MAIN, ">$tempDir/main.fasta"); # Create a temporary file for storing the cleaned sequences representing the main strand
f82c70f54bd7 Uploaded okorol parents: diff changeset	800 open (COMPLEMENT, ">$tempDir/complement.fasta"); # Create a temporary file for storing the cleaned sequences representing the complementary strand
f82c70f54bd7 Uploaded okorol parents: diff changeset	801 $inputSequenceCount = 0; # Reset input sequence counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	802 while ($sequence = <STDIN>) { # Repeat for every line in the standard input
f82c70f54bd7 Uploaded okorol parents: diff changeset	803 chomp($sequence); # Truncate any potential line feeds
f82c70f54bd7 Uploaded okorol parents: diff changeset	804 if (substr($sequence,0,1) eq ">") { # If a new FASTA entry is found in the input
f82c70f54bd7 Uploaded okorol parents: diff changeset	805 $inputSequenceCount++; # Add one to the input sequence counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	806 print MAIN $mainSeq . "\n"; # Write the previous main DNA sequence to the main sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	807 print MAIN $sequence . " main\n"; # Write the definition line of the new sequence to the main sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	808 $sequenceDB{"$sequenceID"} = $mainSeq; # Add sequence to sequence database
f82c70f54bd7 Uploaded okorol parents: diff changeset	809 $headers{"$sequenceID"} = $header; # Add the header to the header database
f82c70f54bd7 Uploaded okorol parents: diff changeset	810 push(@sequenceOrder,$sequenceID); # Add this sequence ID to the ordered list of sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	811 $sequenceID = split(" ",substr($sequence,1));
f82c70f54bd7 Uploaded okorol parents: diff changeset	812 $header = $sequence; # Save the sequence header
f82c70f54bd7 Uploaded okorol parents: diff changeset	813 $mainSeq = ""; # Empty the main sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	814 if ($complement == 1) { # If the complementary file should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	815 $complementSeq = reverse($complementSeq); # Reverse the complementary DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	816 print COMPLEMENT $complementSeq . "\n"; # Write the previous complementary DNA sequence to the complementary sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	817 print COMPLEMENT $sequence . " complement\n"; # Write the definition line of the new sequence to the complement sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	818 $complementSeq = ""; # Empty the complementary sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	819 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	820 } else { # If this line is just a continuation of the current DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	821 $mseq = $sequence; # Store this part of the DNA sequence in the intermediate varaible $mseq
f82c70f54bd7 Uploaded okorol parents: diff changeset	822 $mseq =~ s/[ .-]//g; # Remove any gap characters present in this sequence part (good if input was an alignment)
f82c70f54bd7 Uploaded okorol parents: diff changeset	823 $mseq =~ tr/[a-z]/[A-Z]/; # Make all letters uppercase
f82c70f54bd7 Uploaded okorol parents: diff changeset	824 $mseq =~ s/[^A-Z]//g; # Remove all non-alphabetic characters
f82c70f54bd7 Uploaded okorol parents: diff changeset	825 $mseq =~ tr/U/T/; # Exchanges U:s for T:s (Uracil to Thymine, good if input was RNA sequence)
f82c70f54bd7 Uploaded okorol parents: diff changeset	826 $mainSeq = $mainSeq . $mseq; # Add the intermediate DNA sequence to the end of the main DNA sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	827
f82c70f54bd7 Uploaded okorol parents: diff changeset	828 if ($complement == 1) { # If the complementary file should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	829 $cseq = $sequence; # Store this part of the DNA sequence in the intermediate varaible $cseq
f82c70f54bd7 Uploaded okorol parents: diff changeset	830 $cseq =~ s/[ .-]//g; # Remove any gap characters present in this sequence part (good if input was an alignment)
f82c70f54bd7 Uploaded okorol parents: diff changeset	831 $cseq =~ tr/[a-z]/[A-Z]/; # Make all letters uppercase
f82c70f54bd7 Uploaded okorol parents: diff changeset	832 $cseq =~ s/[^A-Z]//g; # Remove all non-alphabetic characters
f82c70f54bd7 Uploaded okorol parents: diff changeset	833 $cseq =~ tr/ACGTURYSWKMBDHVN/TGCAAYRWSMKVHDBN/; # Replace all characters with its complementary base
f82c70f54bd7 Uploaded okorol parents: diff changeset	834 $complementSeq = $complementSeq . $cseq; # Add the intermediate DNA sequence to the end of the complementary DNA sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	835 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	836 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	837 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	838 ## When the input file's end is reached
f82c70f54bd7 Uploaded okorol parents: diff changeset	839 print MAIN $mainSeq . "\n"; # Write the last main DNA sequence to the main sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	840 if ($complement == 1) { # If the complementary file should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	841 $complementSeq = reverse($complementSeq); # Reverse the complementary DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	842 print COMPLEMENT $complementSeq . "\n"; # Write the last complementary DNA sequence to the complementary sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	843 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	844 $sequenceDB{"$sequenceID"} = $mainSeq; # Add sequence to sequence database
f82c70f54bd7 Uploaded okorol parents: diff changeset	845 $headers{"$sequenceID"} = $header; # Add the header to the header database
f82c70f54bd7 Uploaded okorol parents: diff changeset	846 push(@sequenceOrder,$sequenceID); # Add this sequence ID to the ordered list of sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	847 $mainSeq = ""; # Empty the main sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	848 $complementSeq = ""; # Empty the complementary sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	849 close (COMPLEMENT); # Close the complementary output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	850 close (MAIN); # Close the main output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	851 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	852
f82c70f54bd7 Uploaded okorol parents: diff changeset	853 if ($out_sum == 1) { # If summary output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	854 print SUMMARY "Number of sequences in input file: \t$inputSequenceCount\n"; # Write info on the number of input sequences to the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	855 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	856
f82c70f54bd7 Uploaded okorol parents: diff changeset	857
f82c70f54bd7 Uploaded okorol parents: diff changeset	858 ## Perform HMM-scan
f82c70f54bd7 Uploaded okorol parents: diff changeset	859 if ($hmmscan eq "") { # If a pre-computed hmmscan output file is not supplied
f82c70f54bd7 Uploaded okorol parents: diff changeset	860 if ($heuristics == 0) { # If HMMER's heuristic filtering should not be used
f82c70f54bd7 Uploaded okorol parents: diff changeset	861 $heurMax = "--max"; # Set the heurMax to "--max" (indicating that HMMER should turn off filtering)
f82c70f54bd7 Uploaded okorol parents: diff changeset	862 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	863 $heurMax = ""; # Set the heurMax to empty (indicating that HMMER should turn on filtering)
f82c70f54bd7 Uploaded okorol parents: diff changeset	864 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	865 if ($multi_thread == 0) { # If multi-threading is off
f82c70f54bd7 Uploaded okorol parents: diff changeset	866 ## Get the current time and output info message
f82c70f54bd7 Uploaded okorol parents: diff changeset	867 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	868 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	869 print STDERR "$now : Comparing sequences to HMM database (this may take a long while)...\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	870 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	871 foreach $set (@profileSet) { # Go sequentially through all profile sets to search for
f82c70f54bd7 Uploaded okorol parents: diff changeset	872 $hmmPath = $profileDB . "/" . $set . ".hmm"; # Set the path to the HMM-file of the current set
f82c70f54bd7 Uploaded okorol parents: diff changeset	873 if ($search_eval ne "") { # If E-value cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	874 hmmerSearch("hmmscan --cpu $cpu $heurMax -E $search_eval $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set); # Call HMMER with E-value cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	875 } else { # If score cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	876 hmmerSearch("hmmscan --cpu $cpu $heurMax -T $search_score $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set); # Call HMMER with score cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	877 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	878 if ($complement == 1) { # If the complementary file should be scanned
f82c70f54bd7 Uploaded okorol parents: diff changeset	879 if ($search_eval ne "") { # If E-value cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	880 hmmerSearch("hmmscan --cpu $cpu $heurMax -E $search_eval $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set); # Call HMMER with E-value cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	881 } else { # If score cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	882 hmmerSearch("hmmscan --cpu $cpu $heurMax -T $search_score $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set); # Call HMMER with score cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	883 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	884 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	885 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	886 } else { # If multi-threading is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	887 ## Get the current time and output info message
f82c70f54bd7 Uploaded okorol parents: diff changeset	888 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	889 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	890 print STDERR "$now : Doing paralellised comparison to HMM database (this may take a long while)...\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	891 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	892
f82c70f54bd7 Uploaded okorol parents: diff changeset	893 ## Determining number of cpus per thread
f82c70f54bd7 Uploaded okorol parents: diff changeset	894 if ($complement == 1) { # If the complementary file should be scanned
f82c70f54bd7 Uploaded okorol parents: diff changeset	895 $hmmcpu = int(0.5 * $cpu / scalar(@profileSet)); # Assign X CPUs to each thread, X = 0.5 * (TOTAL_CPUs_USED) / (TOTAL_NUMBER_OF_PROFILE_SETS)
f82c70f54bd7 Uploaded okorol parents: diff changeset	896 } else { # If the complementary file should not be scanned
f82c70f54bd7 Uploaded okorol parents: diff changeset	897 $hmmcpu = int($cpu / scalar(@profileSet)); # Assign X CPUs to each thread, X = (TOTAL_CPUs_USED) / (TOTAL_NUMBER_OF_PROFILE_SETS)
f82c70f54bd7 Uploaded okorol parents: diff changeset	898 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	899 if ($hmmcpu < 1) { # If the number of CPUs per thread is smaller than 1
f82c70f54bd7 Uploaded okorol parents: diff changeset	900 $hmmcpu = 1; # Give each thread at least one CPU to work on
f82c70f54bd7 Uploaded okorol parents: diff changeset	901 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	902 ## Main strand searches...
f82c70f54bd7 Uploaded okorol parents: diff changeset	903 $cpuCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	904 foreach $set (@profileSet) { # Go through each profile set to investigate
f82c70f54bd7 Uploaded okorol parents: diff changeset	905 if ($cpuCount < $cpu) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	906 $cpuCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	907 $pid = fork(); # Fork off a copy of this process for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	908 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	909 $deceasedPID = wait(); # Wait until a PID is finished, and gather its number
f82c70f54bd7 Uploaded okorol parents: diff changeset	910 $pid = fork(); # Fork off a copy of this process for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	911 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	912 if ($pid != 0) { # If this is the parent process
f82c70f54bd7 Uploaded okorol parents: diff changeset	913 push(@pids,$pid); # Add the new process ID to the list of active process IDs
f82c70f54bd7 Uploaded okorol parents: diff changeset	914 } else { # If this is the new child process
f82c70f54bd7 Uploaded okorol parents: diff changeset	915 $hmmPath = $profileDB . "/" . $set . ".hmm"; # Set the path to the HMM-file of the current set
f82c70f54bd7 Uploaded okorol parents: diff changeset	916 if ($search_eval ne "") { # If E-value cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	917 hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -E $search_eval $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set); # Call HMMER with E-value cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	918 } else { # If score cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	919 hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -T $search_score $hmmPath $tempDir/main.fasta","$tempDir/main.$set.hmmscan","M",$set); # Call HMMER with score cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	920 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	921 ## Stop child process...
f82c70f54bd7 Uploaded okorol parents: diff changeset	922 exit; # Exits the child process
f82c70f54bd7 Uploaded okorol parents: diff changeset	923 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	924 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	925 ## Revese strand searches...
f82c70f54bd7 Uploaded okorol parents: diff changeset	926 if ($complement == 1) { # If the complementary file should be scanned
f82c70f54bd7 Uploaded okorol parents: diff changeset	927 foreach $set (@profileSet) { # Go through each profile set to investigate
f82c70f54bd7 Uploaded okorol parents: diff changeset	928 if ($cpuCount < $cpu) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	929 $cpuCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	930 $pid = fork(); # Fork off a copy of this process for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	931 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	932 $deceasedPID = wait(); # Wait until a PID is finished, and gather its number
f82c70f54bd7 Uploaded okorol parents: diff changeset	933 $pid = fork(); # Fork off a copy of this process for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	934 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	935 if ($pid != 0) { # If this is the parent process
f82c70f54bd7 Uploaded okorol parents: diff changeset	936 push(@pids,$pid); # Add the new process ID to the list of active process IDs
f82c70f54bd7 Uploaded okorol parents: diff changeset	937 } else { # If this is the new child process
f82c70f54bd7 Uploaded okorol parents: diff changeset	938 $hmmPath = $profileDB . "/" . $set . ".hmm"; # Set the path to the HMM-file of the current set
f82c70f54bd7 Uploaded okorol parents: diff changeset	939 if ($search_eval ne "") { # If E-value cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	940 hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -E $search_eval $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set); # Call HMMER with E-value cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	941 } else { # If score cutoff is use for the search
f82c70f54bd7 Uploaded okorol parents: diff changeset	942 hmmerSearch("hmmscan --cpu $hmmcpu $heurMax -T $search_score $hmmPath $tempDir/complement.fasta","$tempDir/complement.$set.hmmscan","C",$set); # Call HMMER with score cutoff
f82c70f54bd7 Uploaded okorol parents: diff changeset	943 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	944 ## Stop child process...
f82c70f54bd7 Uploaded okorol parents: diff changeset	945 exit; # Exits the child process
f82c70f54bd7 Uploaded okorol parents: diff changeset	946 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	947 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	948 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	949 ## Get the current time and output the active process IDs
f82c70f54bd7 Uploaded okorol parents: diff changeset	950 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	951 #print STDERR " $now : Active PIDs: ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	952 #foreach $p (@pids) { # Go through the list of PIDs
f82c70f54bd7 Uploaded okorol parents: diff changeset	953 # print STDERR "$p "; # Print the PID
f82c70f54bd7 Uploaded okorol parents: diff changeset	954 #}
f82c70f54bd7 Uploaded okorol parents: diff changeset	955 #print STDERR "\n"; # Print a new line
f82c70f54bd7 Uploaded okorol parents: diff changeset	956 do { # Loop until all child PIDs have finished.
f82c70f54bd7 Uploaded okorol parents: diff changeset	957 $deceasedPID = wait(); # Wait until a PID is finished, and gather its number
f82c70f54bd7 Uploaded okorol parents: diff changeset	958 $now = localtime; # Get the current time
f82c70f54bd7 Uploaded okorol parents: diff changeset	959 if ($deceasedPID > -1) { # If the PID that finished wasn't the last active one
f82c70f54bd7 Uploaded okorol parents: diff changeset	960 # print STDERR " $now : PID $deceasedPID finished.\n"; # Print finished PID
f82c70f54bd7 Uploaded okorol parents: diff changeset	961 } else { # If PID that finished was the last
f82c70f54bd7 Uploaded okorol parents: diff changeset	962 print STDERR " $now : All processes finished.\n"; # Print that all PIDs have finished
f82c70f54bd7 Uploaded okorol parents: diff changeset	963 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	964 } until (wait() == -1); # Do this loop until all PIDs have finished
f82c70f54bd7 Uploaded okorol parents: diff changeset	965 $now = localtime; # Get current time
f82c70f54bd7 Uploaded okorol parents: diff changeset	966 print STDERR "$now : Parallel HMM-scan finished.\n"; # Print informative finishing message
f82c70f54bd7 Uploaded okorol parents: diff changeset	967 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	968 } else { # If a pre-computed hmmscan file is supplied then
f82c70f54bd7 Uploaded okorol parents: diff changeset	969 ## Get the current time and output that the hmmscan step is skipped
f82c70f54bd7 Uploaded okorol parents: diff changeset	970 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	971 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	972 print STDERR "$now : Skipping hmmscan! Using $hmmscan as input for the analysis instead.\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	973 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	974 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	975
f82c70f54bd7 Uploaded okorol parents: diff changeset	976 ## Analyse HMM-scan output
f82c70f54bd7 Uploaded okorol parents: diff changeset	977 ## Get the current time and output info
f82c70f54bd7 Uploaded okorol parents: diff changeset	978 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	979 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	980 print STDERR "$now : Analysing results of HMM-scan (this might take quite some time)...\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	981 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	982
f82c70f54bd7 Uploaded okorol parents: diff changeset	983 ## Set up output files
f82c70f54bd7 Uploaded okorol parents: diff changeset	984 if ($out_table == 1) { # If table output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	985 open (TABLE, ">$output.hmmer.table"); # Create a table output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	986 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	987 if ($out_graph == 1) { # If graphical output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	988 open (GRAPH, ">$output.graph"); # Create a graph output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	989 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	990 if ($out_not == 1) { # If not-found output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	991 open (NOTFOUND, ">$tempDir/$output\_hmmer_no_detections.txt"); # Create a HMMER not-found output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	992 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	993
f82c70f54bd7 Uploaded okorol parents: diff changeset	994 $setI = 0; # Set the profile set indicator to zero
f82c70f54bd7 Uploaded okorol parents: diff changeset	995 foreach $set (@profileSet) { # Go through all the profile sets to be investigated
f82c70f54bd7 Uploaded okorol parents: diff changeset	996 for ($co = 0; $co <= 1; $co++) { # Do main (and complementary) strand analysis in order
f82c70f54bd7 Uploaded okorol parents: diff changeset	997 if ($co > 0) { # If main strand analysis is finished
f82c70f54bd7 Uploaded okorol parents: diff changeset	998 if ($complement == 1) { # If complementary strand should be analysed
f82c70f54bd7 Uploaded okorol parents: diff changeset	999 open (HMMOUTPUT, "$tempDir/complement.$set.hmmscan"); # Open hmmscan output for reading
f82c70f54bd7 Uploaded okorol parents: diff changeset	1000 open (SEQUENCES, "$tempDir/complement.fasta"); # Open complementary sequence file for reading
f82c70f54bd7 Uploaded okorol parents: diff changeset	1001 if ($out_table == 1) { # If table output is on, write a header for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	1002 print TABLE "***********************************************************\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1003 print TABLE "$set matches on complementary strand:\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1004 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1005 if ($out_graph == 1) { # If graphical output is on, write a header for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	1006 print GRAPH "***********************************************************\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1007 print GRAPH "$set matches on complementary strand:\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1008 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1009 } else { # If complementary strand should not be analysed
f82c70f54bd7 Uploaded okorol parents: diff changeset	1010 last; # Exit this loop
f82c70f54bd7 Uploaded okorol parents: diff changeset	1011 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1012 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1013 if ($co == 0) { # If main strand analysis is not finished
f82c70f54bd7 Uploaded okorol parents: diff changeset	1014 open (HMMOUTPUT, "$tempDir/main.$set.hmmscan"); # Open hmmscan output for reading
f82c70f54bd7 Uploaded okorol parents: diff changeset	1015 open (SEQUENCES, "$tempDir/main.fasta"); # Open main sequence file for reading
f82c70f54bd7 Uploaded okorol parents: diff changeset	1016 if ($out_table == 1) { # If table output is on, write a header for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	1017 print TABLE "***********************************************************\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1018 print TABLE "$set matches on main strand:\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1019 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1020 if ($out_graph == 1) { # If graphical output is on, write a header for this set
f82c70f54bd7 Uploaded okorol parents: diff changeset	1021 print GRAPH "***********************************************************\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1022 print GRAPH "$set matches on main strand:\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1023 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1024 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1025 ## Read and analyse hmmscan output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1026 while ($line = <HMMOUTPUT>) { # Read in the hmmscan output file, line by line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1027 chomp($line); # Remove any potential line feeds
f82c70f54bd7 Uploaded okorol parents: diff changeset	1028 if (substr($line,0,13) eq "## New query:") { # If this line begin with "## New query:", then this is a new entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	1029 undef %hits; # Empty the hits hash
f82c70f54bd7 Uploaded okorol parents: diff changeset	1030 undef %evals; # Empty the e-value hash
f82c70f54bd7 Uploaded okorol parents: diff changeset	1031 undef %scores; # Empty the score hash
f82c70f54bd7 Uploaded okorol parents: diff changeset	1032 $querytemp = substr($line,14); # Extract everything from this line, except for the start ("## New query:")
f82c70f54bd7 Uploaded okorol parents: diff changeset	1033 ($query,$length) = split('\t',$querytemp); # Split query name, length and DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1034 if ($co == 0) { # If main strand analysis
f82c70f54bd7 Uploaded okorol parents: diff changeset	1035 $DNA = $sequenceDB{"$query"};
f82c70f54bd7 Uploaded okorol parents: diff changeset	1036 } else { # If complementary strand
f82c70f54bd7 Uploaded okorol parents: diff changeset	1037 $cseq = $sequenceDB{"$query"}; # Store the DNA sequence in the intermediate varaible $cseq
f82c70f54bd7 Uploaded okorol parents: diff changeset	1038 $cseq =~ tr/[a-z]/[A-Z]/; # Make all letters uppercase
f82c70f54bd7 Uploaded okorol parents: diff changeset	1039 $cseq =~ s/[^A-Z]//g; # Remove all non-alphabetic characters
f82c70f54bd7 Uploaded okorol parents: diff changeset	1040 $cseq =~ tr/ACGTURYSWKMBDHVN/TGCAAYRWSMKVHDBN/; # Replace all characters with its complementary base
f82c70f54bd7 Uploaded okorol parents: diff changeset	1041 $DNA = reverse($cseq);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1042 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1043 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1044
f82c70f54bd7 Uploaded okorol parents: diff changeset	1045 if ($line ne "//") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1046 ## Find domain annotations...
f82c70f54bd7 Uploaded okorol parents: diff changeset	1047 ($query,$matchProfile,$length,$domNo,$sign,$score,$bias,$cE,$iE,$hmmFrom,$hmmTo,$hmmends,$queryFrom,$queryTo,$queryends,$envFrom,$envTo,$envends,$acc) = split('\t',$line); # Split the line into a collection of stat variables
f82c70f54bd7 Uploaded okorol parents: diff changeset	1048
f82c70f54bd7 Uploaded okorol parents: diff changeset	1049 $useQueryFrom = $queryFrom;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1050 if ($hmmFrom > 1) { # If the HMM-profile is not matched from the beginning
f82c70f54bd7 Uploaded okorol parents: diff changeset	1051 $hmmDiff = $hmmFrom - 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1052 if ($useQueryFrom > $hmmDiff) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1053 $useQueryFrom = $queryFrom - $hmmDiff;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1054 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1055 $useQueryFrom = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1056 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1057 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1058
f82c70f54bd7 Uploaded okorol parents: diff changeset	1059 if (uc($anchor) eq "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1060 $anchorLen = $hmmTo - $hmmFrom + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1061 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1062 $anchorLen = $anchor;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1063 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1064
f82c70f54bd7 Uploaded okorol parents: diff changeset	1065
f82c70f54bd7 Uploaded okorol parents: diff changeset	1066 $query_profile_match = $query . ":" . $matchProfile;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1067 $profileExists = 0; # Assume that the newly found match profile ($matchProfile) is not already found for this sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1068 if (exists($hits{$query_profile_match})) { # If a profile from the list is the same as the match profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1069 ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE) = split('\t',$hits{$query_profile_match}); # Split the entry in list into stat variables
f82c70f54bd7 Uploaded okorol parents: diff changeset	1070 if ($iE < $hitE) { # If the new match profile has a smaller E-value than the one from the list
f82c70f54bd7 Uploaded okorol parents: diff changeset	1071 $hits{$query_profile_match} = "$useQueryFrom\t$envTo\t$matchProfile\t$score\t$iE\t$anchorLen"; # Replace the data in the hit list with the data for the newly found match profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1072 $evals{$query_profile_match} = $iE; # Replace the E-value in the hit list with the E-value for the newly found match profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1073 $scores{$query_profile_match} = $score; # Replace the score in the hit list with the score for the newly found match profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1074 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1075 $profileExists = 1; # Indicate that this match profile was found in the hit list
f82c70f54bd7 Uploaded okorol parents: diff changeset	1076 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1077 if ($profileExists == 0) { # If the match profile was not found in the hit list
f82c70f54bd7 Uploaded okorol parents: diff changeset	1078 if (($iE <= $E) && ($score >= $S)) { # If this hits lives up to the minimal score and E-value cutoffs
f82c70f54bd7 Uploaded okorol parents: diff changeset	1079 $hits{$query_profile_match} = "$useQueryFrom\t$envTo\t$matchProfile\t$score\t$iE\t$anchorLen"; # Add the data for the newly found match profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1080 $evals{$query_profile_match} = $iE; # Add the E-value for the newly found match profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1081 $scores{$query_profile_match} = $score; # Add the score for the newly found match profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1082 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1083 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1084 } else { # If the line only contains "//", the end of this sequence's hmmscan entry is reached
f82c70f54bd7 Uploaded okorol parents: diff changeset	1085 ## Save analysis results
f82c70f54bd7 Uploaded okorol parents: diff changeset	1086 @sortedKeys = sort {$hits{$a} <=> $hits{$b}} keys(%hits); # Sort the the list of hits numerically ascending (smallest first)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1087 undef @sortedHits;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1088 undef @scores;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1089 undef @evals;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1090 foreach $key (@sortedKeys) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1091 push(@sortedHits, $hits{$key}); # Add the hit to the list of hits numerically ascending (smallest first)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1092 push(@scores, $scores{$key}); # Add the score to the scores array
f82c70f54bd7 Uploaded okorol parents: diff changeset	1093 push(@evals, $evals{$key}); # Add the E-value to the evals array
f82c70f54bd7 Uploaded okorol parents: diff changeset	1094 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1095
f82c70f54bd7 Uploaded okorol parents: diff changeset	1096 ## If the number of hits > N, the min eval < E and the max score > S then include query sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1097 ## OR if a single domain satisfies the thresholds and this is allowed, include it!
f82c70f54bd7 Uploaded okorol parents: diff changeset	1098 if ( ((scalar(@sortedHits) >= $N) && (min(@evals) <= $E) && (max(@scores) >= $S)) \|\|
f82c70f54bd7 Uploaded okorol parents: diff changeset	1099 ((scalar(@sortedHits) > 0) && ($allow_single_E >= 0) && (min(@evals) <= $allow_single_E) && (max(@scores) >= $allow_single_score)) ) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1100 if ($debug == 1) { # If debugging mode is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1101 print STDERR $query . " :\t" . scalar(@sortedHits) . "\t" . min(@evals) . "\t" . max(@scores) . "\n"; # Print some top hit statistics
f82c70f54bd7 Uploaded okorol parents: diff changeset	1102 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1103
f82c70f54bd7 Uploaded okorol parents: diff changeset	1104 ## Save some total stats to be able to determine origin of ITS sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1105 if (scalar(@evals) > 0) { # If there are any E-values stored
f82c70f54bd7 Uploaded okorol parents: diff changeset	1106 $averageE = sum(@evals) / scalar(@evals); # Calculate the average E-value for this profile set
f82c70f54bd7 Uploaded okorol parents: diff changeset	1107 $averageScore = sum(@scores) / scalar(@scores); # Calculate the average score for this profile set
f82c70f54bd7 Uploaded okorol parents: diff changeset	1108 $numberOfDomains = scalar(@sortedHits); # Calculate the number of domains matched on this sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1109
f82c70f54bd7 Uploaded okorol parents: diff changeset	1110 #$scoreSum = sum(@scores) / @modelCount[$setI]; # Calculate score sum as: sum / (no. of profiles of this given type)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1111 $scoreSum = sum(@scores) / 4; # Calculate score sum as: sum / (no. of profiles of this given type)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1112
f82c70f54bd7 Uploaded okorol parents: diff changeset	1113 $saveThis = "$query\t$set\t$co\t$numberOfDomains\t$averageE\t$averageScore\t$scoreSum\t$DNA\t"; # Collect the variables to save for this sequence and this profile set
f82c70f54bd7 Uploaded okorol parents: diff changeset	1114 foreach $hit (@sortedHits) { # Go through the list of hits and add specific information to save from each hit
f82c70f54bd7 Uploaded okorol parents: diff changeset	1115 ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE,$hitanchorlen) = split('\t',$hit); # Extract information from this hit
f82c70f54bd7 Uploaded okorol parents: diff changeset	1116 $saveThis = $saveThis . "$hitFrom;$hitTo;$hitProfile;$hitScore;$hitE;$hitanchorlen\t"; # Add information to the list of variables to save
f82c70f54bd7 Uploaded okorol parents: diff changeset	1117 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1118 #push(@allHits, $saveThis); # Add this information to the collection of all hits for this sequence, across all profile sets
f82c70f54bd7 Uploaded okorol parents: diff changeset	1119 if (exists($allHits{$query})) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1120 $allHits{$query} = $allHits{$query} . "\n" . $saveThis;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1121 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1122 $allHits{$query} = $saveThis;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1123 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1124 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1125
f82c70f54bd7 Uploaded okorol parents: diff changeset	1126 if ($out_table == 1) { # If table output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1127 print TABLE $query . "\t" . $length . "\t"; # Print query and length information to table
f82c70f54bd7 Uploaded okorol parents: diff changeset	1128 foreach $hit (@sortedHits) { # Go through each hit in the hit list
f82c70f54bd7 Uploaded okorol parents: diff changeset	1129 ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE,$hitanchorlen) = split('\t',$hit); # Extract data corresponding to this hit
f82c70f54bd7 Uploaded okorol parents: diff changeset	1130 print TABLE "$hitFrom - $hitTo: $hitProfile ($hitScore, $hitE)\t"; # Print hit information to table
f82c70f54bd7 Uploaded okorol parents: diff changeset	1131 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1132 print TABLE "\n"; # Print new line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1133 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1134
f82c70f54bd7 Uploaded okorol parents: diff changeset	1135 if ($out_graph == 1) { # If graphical output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1136 print GRAPH ">> " . $query . "\t" . $length . " bp\n"; # Print a sequence header
f82c70f54bd7 Uploaded okorol parents: diff changeset	1137 $insertPoint = 0; # Set the domain insert point to beginning of line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1138 $hi = 0; # Set hit number to zero
f82c70f54bd7 Uploaded okorol parents: diff changeset	1139 foreach $hit (@sortedHits) { # Go through the hit list
f82c70f54bd7 Uploaded okorol parents: diff changeset	1140 ($hitFrom,$hitTo,$hitProfile,$hitScore,$hitE,$anchorLen) = split('\t',$hit); # Split the hit into stat variables
f82c70f54bd7 Uploaded okorol parents: diff changeset	1141 if ($graph_scale == 0) { # If the graph scale is scaled individually to 100% for each sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1142 $pFrom = $hitFrom / $length * 100; # Set the profile start on graph relative to its position in the sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1143 $pTo = $hitTo / $length * 100; # Set the profile end on graph relative to its position in the sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1144 $pEnd = 100; # Set the end of the sequence graph to be at 100
f82c70f54bd7 Uploaded okorol parents: diff changeset	1145 } else { # If the scale is the same for all sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	1146 $pFrom = $hitFrom * $graph_scale; # Set the profile start on graph scaled to the parameter given
f82c70f54bd7 Uploaded okorol parents: diff changeset	1147 $pTo = $hitTo * $graph_scale; # Set the profile end on graph scaled to the parameter given
f82c70f54bd7 Uploaded okorol parents: diff changeset	1148 $pEnd = $length * $graph_scale; # Set the end of the sequence graph to be at the end of the sequence scaled to the parameter given
f82c70f54bd7 Uploaded okorol parents: diff changeset	1149 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1150 for ($insertPoint = $insertPoint; $insertPoint <= $pFrom; $insertPoint++) { # Go forward through the sequence, moving the insert point one step at a time until the beginning of the next profile is reached
f82c70f54bd7 Uploaded okorol parents: diff changeset	1151 print GRAPH "-"; # Print a "-"
f82c70f54bd7 Uploaded okorol parents: diff changeset	1152 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1153 print GRAPH substr($hitProfile,2,3); # When the profile is reached, print its name
f82c70f54bd7 Uploaded okorol parents: diff changeset	1154 $insertPoint = $insertPoint + 3; # Move the insert point three steps forward, to account for the inserted name
f82c70f54bd7 Uploaded okorol parents: diff changeset	1155 ($nextHitStart,$nextHitEnd,$nextProfile) = split('\t',@sortedHits[$hi + 1]); # Check where the next hit in the list is located
f82c70f54bd7 Uploaded okorol parents: diff changeset	1156 if (($nextHitStart <= $hitTo) && ($nextHitStart > 0)) { # If the next hit in the list overlaps with this profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1157 if ($graph_scale == 0) { # If the scale is relative
f82c70f54bd7 Uploaded okorol parents: diff changeset	1158 $pTo = $nextHitStart / $length * 100 - 1; # Change the profile end on the graph to be where this next profile starts
f82c70f54bd7 Uploaded okorol parents: diff changeset	1159 } else { # If the scale is the same for all sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	1160 $pTo = $nextHitStart * $graph_scale - 1; # Change the profile end on the graph to be where this next profile starts
f82c70f54bd7 Uploaded okorol parents: diff changeset	1161 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1162 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1163 for ($insertPoint = $insertPoint; $insertPoint <= $pTo; $insertPoint++) { # Go forward through the sequence, moving the insert point one step at a time until the end of the current profile is reached
f82c70f54bd7 Uploaded okorol parents: diff changeset	1164 print GRAPH "="; # Print a "="
f82c70f54bd7 Uploaded okorol parents: diff changeset	1165 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1166 if (($nextHitStart <= $hitTo) && ($nextHitStart > 0)) { # If the next hit in the list overlaps with this profile
f82c70f54bd7 Uploaded okorol parents: diff changeset	1167 print GRAPH ">"; # Print a ">" to indicate the profile overlap
f82c70f54bd7 Uploaded okorol parents: diff changeset	1168 $insertPoint++; # Move the insert point one additional step forward to account for the ">" inserted
f82c70f54bd7 Uploaded okorol parents: diff changeset	1169 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1170 $hi++; # Increase the hit number by one
f82c70f54bd7 Uploaded okorol parents: diff changeset	1171 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1172 for ($insertPoint = $insertPoint; $insertPoint <= $pEnd; $insertPoint++) { # If there is no more profile matches to sequence, go forward through the sequence, moving the insert point one step at a time until the end of the sequence is reached
f82c70f54bd7 Uploaded okorol parents: diff changeset	1173 print GRAPH "-"; # Print a "-"
f82c70f54bd7 Uploaded okorol parents: diff changeset	1174 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1175 print GRAPH "\n"; # Print a new line, indicating the end of this sequence entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	1176 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1177 } else { # If this sequence didn't find any good-enough profile matches
f82c70f54bd7 Uploaded okorol parents: diff changeset	1178 if ($out_not == 1) { # If not-found output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1179 print NOTFOUND $query . "\n"; # Print the name of this query to the not-found list
f82c70f54bd7 Uploaded okorol parents: diff changeset	1180 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1181 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1182 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1183 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1184 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1185 close (SEQUENCES); # Close the input sequence file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1186 close (HMMOUTPUT); # Close the hmmscan output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1187 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1188 $setI++; # Add one to the profile set indicator
f82c70f54bd7 Uploaded okorol parents: diff changeset	1189 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1190
f82c70f54bd7 Uploaded okorol parents: diff changeset	1191 ## Close output files
f82c70f54bd7 Uploaded okorol parents: diff changeset	1192 if ($out_table == 1) { # If table output is on, close the table file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1193 close (TABLE);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1194 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1195 if ($out_graph == 1) { # If graphical output is on, close the graph file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1196 close (GRAPH);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1197 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1198 if ($out_not == 1) { # If not-found output is on, close the not-found file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1199 close (NOTFOUND);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1200 #$profileCount = scalar(@profileSet); # Count the number of profile sets
f82c70f54bd7 Uploaded okorol parents: diff changeset	1201 #if ($complement == 1) { # If complementary strand was scanned
f82c70f54bd7 Uploaded okorol parents: diff changeset	1202 # $profileCount = $profileCount * 2; # Double the number of profile sets that was investigated (and thus the number of not-founds that could at max be found)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1203 #}
f82c70f54bd7 Uploaded okorol parents: diff changeset	1204 #`sort $tempDir/hmmer_no_detections.txt \| uniq -c \| grep " $profileCount " \| sed "s/ $profileCount //" > $output\_no_detections.txt`; # Sort the not-found list, count the number of profile sets having no matches for each query. Save those that have only non-matches to the hmmer-not-found file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1205 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1206
f82c70f54bd7 Uploaded okorol parents: diff changeset	1207
f82c70f54bd7 Uploaded okorol parents: diff changeset	1208 ## Create total collected output and FASTA output
f82c70f54bd7 Uploaded okorol parents: diff changeset	1209
f82c70f54bd7 Uploaded okorol parents: diff changeset	1210 if ($out_results == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1211 open (RESULTS, ">$output.extraction.results"); # Create a results file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1212 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1213 open (RAWOUT, ">$tempDir/ITSx_output.raw"); # Create a raw output file for ALL data
f82c70f54bd7 Uploaded okorol parents: diff changeset	1214 open (PROBLEM, ">$output.problematic.txt"); # Create a file for problematic entries
f82c70f54bd7 Uploaded okorol parents: diff changeset	1215 $foundProblem = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1216 if ($out_pos == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1217 open (POS, ">$output.positions.txt"); # Create a positions file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1218 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1219 if ($out_fasta == 1) { # If FASTA output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1220 open (FASTA, ">$output.full.fasta"); # Create a FASTA output file for found sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	1221 if ($allow_reorder == 0) { # If reordering of domains is not allowed
f82c70f54bd7 Uploaded okorol parents: diff changeset	1222 open (CHIMERA, ">$output.chimeric.fasta"); # Create a FASTA file for potential chimera sequences with profile matches in the wrong order
f82c70f54bd7 Uploaded okorol parents: diff changeset	1223 $foundChimera = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1224 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1225 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1226 open (FULLPARTIAL, ">$output.full_and_partial.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1227 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1228 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1229
f82c70f54bd7 Uploaded okorol parents: diff changeset	1230 if ($out_joined == 1) { # If SSU FASTA output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1231 open (JOINED, ">$output.joined.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1232 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1233 if ($out_ssu == 1) { # If SSU FASTA output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1234 open (SSU, ">$output.SSU.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1235 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1236 if ($out_lsu == 1) { # If LSU FASTA output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1237 open (LSU, ">$output.LSU.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1238 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1239 if ($out_58S == 1) { # If 5.8S FASTA output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1240 open (MID, ">$output.5_8S.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1241 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1242 if ($out_its1 == 1) { # If ITS1 FASTA output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1243 open (ITS1, ">$output.ITS1.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1244 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1245 open (ITS1PARTIAL, ">$output.ITS1.full_and_partial.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1246 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1247 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1248 if ($out_its2 == 1) { # If ITS2 FASTA output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1249 open (ITS2, ">$output.ITS2.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1250 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1251 open (ITS2PARTIAL, ">$output.ITS2.full_and_partial.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1252 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1253 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1254 if ($out_concat == 1) { # If concatenated output should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	1255 open (CONCAT, ">$output.concat.fasta");
f82c70f54bd7 Uploaded okorol parents: diff changeset	1256 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1257
f82c70f54bd7 Uploaded okorol parents: diff changeset	1258 undef @sortedHits; # Empty the array of sorted hits
f82c70f54bd7 Uploaded okorol parents: diff changeset	1259
f82c70f54bd7 Uploaded okorol parents: diff changeset	1260 # @sortedHits = sort @allHits; # Sort the full list of hits in alphabetical order (to be able to analyse all sequences with same ID at once)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1261 if ($out_not == 1) { # If not-found output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1262 open (NOTFOUND, ">$output\_no_detections.txt"); # Create a not-found output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1263 $noDetect = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1264 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1265 foreach $sequenceID (@sequenceOrder) { # Sort the full list of hits in their original order (to be able to analyse all sequences with same ID at once)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1266 if ($sequenceID ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1267 $countsInList = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1268 if (exists($allHits{$sequenceID})) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1269 @allHits = split('\n',$allHits{$sequenceID});
f82c70f54bd7 Uploaded okorol parents: diff changeset	1270 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1271 undef @allHits;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1272 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1273 foreach $line (@allHits) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1274 @item = split('\t',$line); # Split the line into an array
f82c70f54bd7 Uploaded okorol parents: diff changeset	1275 if (@item[0] eq $sequenceID) { # If this item corresponds to the current sequence ID
f82c70f54bd7 Uploaded okorol parents: diff changeset	1276 push(@sortedHits,$line); # Add it to the sorted list of hits
f82c70f54bd7 Uploaded okorol parents: diff changeset	1277 $countsInList++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1278 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1279 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1280 if ($countsInList == 0) { # If no matches were found
f82c70f54bd7 Uploaded okorol parents: diff changeset	1281 if ($out_not == 1) { # If not-found output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1282 print NOTFOUND "$sequenceID\n"; # Output the sequence ID
f82c70f54bd7 Uploaded okorol parents: diff changeset	1283 $noDetect++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1284 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1285 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1286 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1287 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1288 if ($out_not == 1) { # If not-found output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1289 close (NOTFOUND);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1290 if ($noDetect == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1291 `rm $output\_no_detections.txt 2> /dev/null`;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1292 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1293 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1294
f82c70f54bd7 Uploaded okorol parents: diff changeset	1295 ## Set all counts for different ITS types to zero
f82c70f54bd7 Uploaded okorol parents: diff changeset	1296 undef @itsCounts;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1297 $itsChimeric = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1298 $itsMain = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1299 $itsCompl = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1300
f82c70f54bd7 Uploaded okorol parents: diff changeset	1301 push(@sortedHits,"--END--"); # Add a last item to the sorted list, so that all items are securely saved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1302 $lc = 1; # Set the line count to one
f82c70f54bd7 Uploaded okorol parents: diff changeset	1303 foreach $line (@sortedHits) { # Go through the list of found hits
f82c70f54bd7 Uploaded okorol parents: diff changeset	1304 print RAWOUT "$line\n"; # Write the raw data associated with this hit to the raw data output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1305 @item = split('\t',$line); # Split the line into an array
f82c70f54bd7 Uploaded okorol parents: diff changeset	1306 ## If this sequence ID is the same as the saved ones, then add it, else empty the array sequence ID and save
f82c70f54bd7 Uploaded okorol parents: diff changeset	1307 if ((@seqID[0] ne @item[0]) && (@item[0] ne "") \|\| ($lc > scalar(@sortedHits))) { # If this sequence ID is not the same as the last one and is non-empty, or if the end of the list has been reached
f82c70f54bd7 Uploaded okorol parents: diff changeset	1308 ## Save profile-type which is most likely...
f82c70f54bd7 Uploaded okorol parents: diff changeset	1309 if ($priority eq "sum") { # If the sum-of-scores algorithm should be used to determine the most likely profile-type
f82c70f54bd7 Uploaded okorol parents: diff changeset	1310 ## Reset variables to unrealisticly high or low values
f82c70f54bd7 Uploaded okorol parents: diff changeset	1311 $best = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1312 $bestCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1313 $bestEval = 1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1314 $bestScore = -1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1315 $bestSum = -1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1316
f82c70f54bd7 Uploaded okorol parents: diff changeset	1317 for ($i = 0; $i < scalar(@seqScoreSum); $i++) { # Go through all sum-of-scores entries
f82c70f54bd7 Uploaded okorol parents: diff changeset	1318 if (@seqScoreSum[$i] > $bestSum) { # If the current value is larger than the previous top value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1319 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1320 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1321 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1322 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1323 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1324 $bestSum = @seqScoreSum[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1325 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1326 if (@seqScoreSum[$i] == $bestSum) { # If the current value is equal to the previous top value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1327 if (@seqDomCounts[$i] > $bestCount) { # If the current domain count is larger than the previous top domain count
f82c70f54bd7 Uploaded okorol parents: diff changeset	1328 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1329 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1330 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1331 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1332 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1333 $bestSum = @seqScoreSum[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1334 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1335 if (@seqDomCounts[$i] == $bestCount) { # If the current domain count is equal to the previous top domain count
f82c70f54bd7 Uploaded okorol parents: diff changeset	1336 if (@seqAvgE[$i] < $bestEval) { # If the current E-value is smaller than the previous top E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1337 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1338 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1339 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1340 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1341 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1342 $bestSum = @seqScoreSum[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1343 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1344 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1345 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1346 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1347 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1348 if ($priority eq "domains") { # If the number of found domains should be used to determine the most likely profile-type
f82c70f54bd7 Uploaded okorol parents: diff changeset	1349 ## Reset variables to unrealisticly high or low values
f82c70f54bd7 Uploaded okorol parents: diff changeset	1350 $best = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1351 $bestCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1352 $bestEval = 1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1353 $bestScore = -1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1354
f82c70f54bd7 Uploaded okorol parents: diff changeset	1355 for ($i = 0; $i < scalar(@seqDomCounts); $i++) { # Go through all domain count entries
f82c70f54bd7 Uploaded okorol parents: diff changeset	1356 if (@seqDomCounts[$i] > $bestCount) { # If the current domain count is larger than the previous top domain count
f82c70f54bd7 Uploaded okorol parents: diff changeset	1357 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1358 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1359 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1360 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1361 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1362 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1363 if (@seqDomCounts[$i] == $bestCount) { # If the current domain count is equal to the previous top domain count
f82c70f54bd7 Uploaded okorol parents: diff changeset	1364 if (@seqAvgE[$i] < $bestEval) { # If the current E-value is smaller than the previous top E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1365 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1366 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1367 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1368 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1369 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1370 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1371 if (@seqAvgE[$i] == $bestEval) { # If the current E-value is equal to the previous top E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1372 if (@seqAvgScore[$i] > $bestScore) { # If the current average score is larger than the previous top average score
f82c70f54bd7 Uploaded okorol parents: diff changeset	1373 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1374 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1375 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1376 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1377 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1378 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1379 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1380 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1381 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1382 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1383 if ($priority eq "eval") { # If the average E-value should be used to determine the most likely profile-type
f82c70f54bd7 Uploaded okorol parents: diff changeset	1384 ## Reset variables to unrealisticly high or low values
f82c70f54bd7 Uploaded okorol parents: diff changeset	1385 $best = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1386 $bestCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1387 $bestEval = 1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1388 $bestScore = -1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1389
f82c70f54bd7 Uploaded okorol parents: diff changeset	1390 for ($i = 0; $i < scalar(@seqDomCounts); $i++) { # Go through all domain counts entries
f82c70f54bd7 Uploaded okorol parents: diff changeset	1391 if (@seqAvgE[$i] < $bestEval) { # If the current E-value is smaller than the previous top E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1392 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1393 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1394 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1395 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1396 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1397 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1398 if (@seqAvgE[$i] == $bestEval) { # If the current E-value is equal to the previous top E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1399 if (@seqAvgScore[$i] > $bestScore) { # If the current average score is larger than the previous top average score
f82c70f54bd7 Uploaded okorol parents: diff changeset	1400 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1401 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1402 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1403 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1404 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1405 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1406 if (@seqAvgScore[$i] == $bestScore) { # If the current average score is equal to the previous top average score
f82c70f54bd7 Uploaded okorol parents: diff changeset	1407 if (@seqDomCounts[$i] > $bestCount) { # If the current number of domains is larger than the previous top number of domains
f82c70f54bd7 Uploaded okorol parents: diff changeset	1408 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1409 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1410 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1411 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1412 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1413 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1414 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1415 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1416 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1417 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1418 if ($priority eq "score") { # If the average score should be used to determine the most likely profile-type
f82c70f54bd7 Uploaded okorol parents: diff changeset	1419 ## Reset variables to unrealisticly high or low values
f82c70f54bd7 Uploaded okorol parents: diff changeset	1420 $best = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1421 $bestCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1422 $bestEval = 1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1423 $bestScore = -1000;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1424
f82c70f54bd7 Uploaded okorol parents: diff changeset	1425 for ($i = 0; $i < scalar(@seqDomCounts); $i++) { # Go through all domain counts entries
f82c70f54bd7 Uploaded okorol parents: diff changeset	1426 if (@seqAvgScore[$i] > $bestScore) { # If the current average score is larger than the previous top average score
f82c70f54bd7 Uploaded okorol parents: diff changeset	1427 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1428 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1429 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1430 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1431 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1432 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1433 if (@seqAvgScore[$i] == $bestScore) { # If the current average score is equal to the previous top average score
f82c70f54bd7 Uploaded okorol parents: diff changeset	1434 if (@seqAvgE[$i] < $bestEval) { # If the current E-value is smaller than the previous top E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1435 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1436 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1437 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1438 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1439 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1440 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1441 if (@seqAvgE[$i] == $bestEval) { # If the current E-value is equal to the previous top E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1442 if (@seqDomCounts[$i] > $bestCount) { # If the current number of domains is larger than the previous top number of domains
f82c70f54bd7 Uploaded okorol parents: diff changeset	1443 $best = $i; # Set the best value to be the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1444 ## Set all other best variables to those corresponding to the current value
f82c70f54bd7 Uploaded okorol parents: diff changeset	1445 $bestCount = @seqDomCounts[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1446 $bestEval = @seqAvgE[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1447 $bestScore = @seqAvgScore[$i];
f82c70f54bd7 Uploaded okorol parents: diff changeset	1448 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1449 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1450 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1451 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1452 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1453
f82c70f54bd7 Uploaded okorol parents: diff changeset	1454 if (@seqID[$best] ne "") { # If the sequence ID of the most likely profile is not empty
f82c70f54bd7 Uploaded okorol parents: diff changeset	1455
f82c70f54bd7 Uploaded okorol parents: diff changeset	1456 $allanchorLens = @anchorLens[$best]; # Get the best anchor lengths
f82c70f54bd7 Uploaded okorol parents: diff changeset	1457 @allanchorLens = split(',', $allanchorLens); # Split the anchor lens into an array
f82c70f54bd7 Uploaded okorol parents: diff changeset	1458
f82c70f54bd7 Uploaded okorol parents: diff changeset	1459 $chimeric = 0; # Assume the sequence is not chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1460 if ($allow_reorder == 0) { # If re-order of domain is not allowed
f82c70f54bd7 Uploaded okorol parents: diff changeset	1461 $domain_order = @allSeqDomains[$best]; # Gather the order the domains are found in
f82c70f54bd7 Uploaded okorol parents: diff changeset	1462 @domain_order = split(' ',$domain_order); # Split the list into an array
f82c70f54bd7 Uploaded okorol parents: diff changeset	1463 @sorted_domain_order = sort {$a cmp $b} @domain_order; # Sort the array alphabetically
f82c70f54bd7 Uploaded okorol parents: diff changeset	1464 for ($di = 0; $di <= scalar(@domain_order); $di++) { # Go through the sorted array
f82c70f54bd7 Uploaded okorol parents: diff changeset	1465 if ((@domain_order[$di] ne @sorted_domain_order[$di]) \|\| (@problemCode[$best] =~ m/C/)) { # Check if the order of the arrays differ at any poiny
f82c70f54bd7 Uploaded okorol parents: diff changeset	1466 $chimeric = 1; # If they do differ, mark the sequence as chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1467 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1468 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1469 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1470
f82c70f54bd7 Uploaded okorol parents: diff changeset	1471 $seqDNALength = length(@seqDNA[$best]); # Get the length of the DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1472
f82c70f54bd7 Uploaded okorol parents: diff changeset	1473 ## Print sequence and match data...
f82c70f54bd7 Uploaded okorol parents: diff changeset	1474 ## Order of columns in the output file:
f82c70f54bd7 Uploaded okorol parents: diff changeset	1475 ## ID Length Type Main/Compl Domains Avg.Eval Avg.Score Start End Start_domain End_domain Chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1476 if ($out_results == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1477 print RESULTS @seqID[$best] . "\t" . $seqDNALength . "\t" . @seqITSType[$best] . "\t" . @seqCompl[$best] . "\t" . @seqDomCounts[$best] . "\t" . @seqAvgE[$best] . "\t" . @seqAvgScore[$best] . "\t" . @seqScoreSum[$best] . "\t" . @dnaStart[$best] . "\t" . @dnaEnd[$best] . "\t" . @startDomain[$best] . "\t" . @endDomain[$best] . "\t"; # Print sequence and match data to the results file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1478 if ($chimeric == 1) { # If the sequence was regarded chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1479 print RESULTS "Chimeric\t"; # Add a chimeric tag to the entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	1480 } else { # If not chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1481 print RESULTS "\t"; # Add an empty column
f82c70f54bd7 Uploaded okorol parents: diff changeset	1482 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1483 $allDomains = @allSeqDomains[$best]; # Get the domain order of the entry
f82c70f54bd7 Uploaded okorol parents: diff changeset	1484 $allDomains =~ tr/ /,/; # Replace spaces with commas in the domain order string
f82c70f54bd7 Uploaded okorol parents: diff changeset	1485 $allDomains = substr($allDomains,0,length($allDomains) - 1); # Remove the last character (a comma)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1486 print RESULTS $allDomains; # Write the domain order to the results file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1487 print RESULTS "\t"; # Write a tab to the results file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1488 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1489
f82c70f54bd7 Uploaded okorol parents: diff changeset	1490 if ($out_pos == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1491 $out_all_pos = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1492 if ($out_all_pos == 1) { # Output positions of all domains
f82c70f54bd7 Uploaded okorol parents: diff changeset	1493 $seqPartLen = @dnaEnd[$best] - @dnaStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1494 if ($seqPartLen < 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1495 $seqPartLen = $seqPartLen * -1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1496 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1497 ## Print the positions of all identified domains to the position file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1498 print POS @seqID[$best] . "\t" . $seqDNALength . " bp." . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1499 if (@problemCode[$best] !~ m/S/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1500 print POS "SSU: " . @ssuStart[$best] . "-" . @ssuEnd[$best] . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1501 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1502 print POS "SSU: Not found\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1503 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1504 if ((@problemCode[$best] =~ m/X/) \|\| ((@problemCode[$best] =~ m/[15]/) && (@problemCode[$best] =~ m/S/))) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1505 print POS "ITS1: Not found\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1506 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1507 if (@problemCode[$best] =~ m/O/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1508 print POS "ITS1: " . (@ssuEnd[$best] + 1) . "-" . (@ssuEnd[$best] + 1) . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1509 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1510 print POS "ITS1: " . @its1Start[$best] . "-" . @its1End[$best] . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1511 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1512 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1513 if (@problemCode[$best] !~ m/[125OP]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1514 print POS "5.8S: " . @midStart[$best] . "-" . @midEnd[$best] . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1515 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1516 if (@problemCode[$best] =~ m/5/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1517 print POS "5.8S: Not found\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1518 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1519 if (@problemCode[$best] =~ m/1/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1520 print POS "5.8S: No start\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1521 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1522 if (@problemCode[$best] =~ m/O/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1523 print POS "5.8S: Overlap SSU\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1524 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1525 if (@problemCode[$best] =~ m/P/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1526 print POS "5.8S: Overlap LSU\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1527 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1528 print POS "5.8S: No end\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1529 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1530 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1531 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1532 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1533 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1534 if ((@problemCode[$best] =~ m/Y/) \|\|((@problemCode[$best] =~ m/[25]/) && (@problemCode[$best] =~ m/L/))) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1535 print POS "ITS2: Not found\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1536 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1537 if (@problemCode[$best] =~ m/O/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1538 print POS "ITS2: " . (@lsuStart[$best] - 1) . "-" . (@lsuStart[$best] - 1) . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1539 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1540 print POS "ITS2: " . @its2Start[$best] . "-" . @its2End[$best] . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1541 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1542 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1543 if (@problemCode[$best] !~ m/L/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1544 print POS "LSU: " . @lsuStart[$best] . "-" . @lsuEnd[$best] . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1545 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1546 print POS "LSU: Not found" . "\t";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1547 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1548
f82c70f54bd7 Uploaded okorol parents: diff changeset	1549 if (@problemCode[$best] =~ m/5/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1550 print POS "Broken or partial sequence, no 5.8S! ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1551 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1552 if (@problemCode[$best] =~ m/[12]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1553 print POS "Broken or partial sequence, only partial 5.8S! ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1554 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1555 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1556 if (@problemCode[$best] =~ m/B/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1557 print POS "ITS region too long! ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1558 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1559 if (@problemCode[$best] =~ m/O/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1560 print POS "5.8S seem to overlap with SSU! ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1561 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1562 if (@problemCode[$best] =~ m/P/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1563 print POS "5.8S seem to overlap with LSU! ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1564 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1565 if (@problemCode[$best] =~ m/C/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1566 print POS "Chimeric! ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1567 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1568 print POS "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1569
f82c70f54bd7 Uploaded okorol parents: diff changeset	1570 } else { # Output only ITS positions
f82c70f54bd7 Uploaded okorol parents: diff changeset	1571 $seqPartLen = @dnaEnd[$best] - @dnaStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1572 print POS @seqID[$best] . "\t" . $seqDNALength . " bp." . "\t" . "ITS1: " . @its1Start[$best] . "-" . @its1End[$best] . "\t" . "ITS2: " . @its2Start[$best] . "-" . @its2End[$best] . "\n"; # Print the positions of the ITS sequences to the position file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1573 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1574 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1575
f82c70f54bd7 Uploaded okorol parents: diff changeset	1576 if (@problem[$best] ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1577 $foundProblem++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1578 print PROBLEM @seqID[$best] . "\t" . @problem[$best] . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1579 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1580
f82c70f54bd7 Uploaded okorol parents: diff changeset	1581 ## Set extended type string (the string going into the definition line of the FASTA file)
f82c70f54bd7 Uploaded okorol parents: diff changeset	1582 $extendedType = $profileIndex{@seqITSType[$best]} . " ITS sequence";
f82c70f54bd7 Uploaded okorol parents: diff changeset	1583 @itsCounts[ord(@seqITSType[$best])]++; # Add one ITS to the appropriate counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	1584
f82c70f54bd7 Uploaded okorol parents: diff changeset	1585 if (@seqCompl[$best] == 1) { # If domains were found on complementary strand
f82c70f54bd7 Uploaded okorol parents: diff changeset	1586 $extendedStrand = "complementary strand"; # Set the strand string to complementary
f82c70f54bd7 Uploaded okorol parents: diff changeset	1587 $itsCompl++; # Add one to the complementary strand counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	1588 } else { # If domains were found on the main strand
f82c70f54bd7 Uploaded okorol parents: diff changeset	1589 $extendedStrand = "main strand"; # Set the strand string to main
f82c70f54bd7 Uploaded okorol parents: diff changeset	1590 $itsMain++; # Add one to the main strand counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	1591 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1592
f82c70f54bd7 Uploaded okorol parents: diff changeset	1593 ## Print (extracted) ITS sequence...
f82c70f54bd7 Uploaded okorol parents: diff changeset	1594 if ($out_fasta == 1) { # If FASTA-output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1595 if ($truncate == 0) { # If the whole sequence should be kept in output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1596 if ($chimeric == 0) { # If the sequence is not chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1597 if (@problemCode[$best] !~ m/[SL]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1598 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1599 print FASTA $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1600 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1601 print FASTA ">" . @seqID[$best] . "\|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) on " . $extendedStrand . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1602 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1603 print FASTA @seqDNA[$best] . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1604 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1605 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1606 if (@problemCode[$best] !~ m/[SL125]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1607 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1608 print FULLPARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1609 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1610 print FULLPARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Full ITS region on " . $extendedStrand . "\n"; # Write FASTA defline
f82c70f54bd7 Uploaded okorol parents: diff changeset	1611 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1612 print FULLPARTIAL @seqDNA[$best] . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1613 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1614 $its1PartLen = @its1End[$best] - @its1Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1615 $its2PartLen = @its2End[$best] - @its2Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1616 if (($out_partial < $its1PartLen) && ($out_partial < $its2PartLen)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1617 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1618 print FULLPARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1619 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1620 print FULLPARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Partial ITS region on " . $extendedStrand . "\n"; # Write FASTA defline
f82c70f54bd7 Uploaded okorol parents: diff changeset	1621 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1622 print FULLPARTIAL @seqDNA[$best] . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1623 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1624 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1625 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1626 } else { # If sequence is regarded chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1627 $itsChimeric++; # Add one to the chimeric counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	1628 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1629 print CHIMERA $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1630 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1631 print CHIMERA ">" . @seqID[$best] . "\|" . @seqITSType[$best] ." Chimeric " . $extendedType . " (" . length(@seqDNA[$best]) . " bp) on " . $extendedStrand . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1632 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1633 print CHIMERA @seqDNA[$best] . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1634 $foundChimera++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1635 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1636 } else { # If only the ITS part of the sequence should be saved to output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1637 $fastaStartPoint = @dnaStart[$best] - 1; # Start extraction at the start of the first domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	1638 $fastaEndPoint = @dnaEnd[$best]+10; # End extraction 10 bp after the last domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	1639
f82c70f54bd7 Uploaded okorol parents: diff changeset	1640 if (@seqDomCounts[$best] > 1) { # If more than one domain was found
f82c70f54bd7 Uploaded okorol parents: diff changeset	1641 if (substr(@startDomain[$best],0,5) eq "1_SSU") { # If the first domain was SSU
f82c70f54bd7 Uploaded okorol parents: diff changeset	1642 $fastaStartPoint = @ssuEnd[$best]; # Set the start point of the extraction to the end of the SSU domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	1643 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1644 if (substr(@startDomain[$best],0,5) eq "4_LSU") { # If the first domain was LSU
f82c70f54bd7 Uploaded okorol parents: diff changeset	1645 $fastaStartPoint = @lsuEnd[$best]; # Set the start point of the extraction to the end of the LSU domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	1646 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1647 if (substr(@endDomain[$best],0,5) eq "1_SSU") { # If the last domain was SSU
f82c70f54bd7 Uploaded okorol parents: diff changeset	1648 $fastaEndPoint = @ssuStart[$best] - 1; # Set the end point of the extraction to the start of the SSU domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	1649 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1650 if (substr(@endDomain[$best],0,5) eq "4_LSU") { # If the last domain was LSU
f82c70f54bd7 Uploaded okorol parents: diff changeset	1651 $fastaEndPoint = @lsuStart[$best] - 1; # Set the end point of the extraction to the start of the LSU domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	1652 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1653 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1654
f82c70f54bd7 Uploaded okorol parents: diff changeset	1655 if ($fastaStartPoint < 0) { # If the start point is smaller than zero, set the start point to zero
f82c70f54bd7 Uploaded okorol parents: diff changeset	1656 $fastaStartPoint = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1657 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1658 if ($fastaEndPoint > length(@seqDNA[$best])) { # If the end point is larger than the length of the sequence, set the end point to the sequence end
f82c70f54bd7 Uploaded okorol parents: diff changeset	1659 $fastaEndPoint = length(@seqDNA[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1660 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1661
f82c70f54bd7 Uploaded okorol parents: diff changeset	1662 $fastaLength = $fastaEndPoint - $fastaStartPoint + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1663
f82c70f54bd7 Uploaded okorol parents: diff changeset	1664 if ($chimeric == 0) { # If the sequence is not chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1665 if (@problemCode[$best] !~ m/[SL]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1666 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1667 print FASTA $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1668 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1669 print FASTA ">" . @seqID[$best] . "\|" . @seqITSType[$best] . " " . $extendedType . " (" . $fastaLength . " bp) on " . $extendedStrand . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1670 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1671 if ($anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1672 if ($fastaStartPoint - @allanchorLens[0] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1673 print FASTA substr(@seqDNA[$best],$fastaStartPoint - @allanchorLens[0],$fastaLength + @allanchorLens[0] + @allanchorLens[3]) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1674 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1675 print FASTA substr(@seqDNA[$best],0,$fastaLength + $fastaStartPoint + @allanchorLens[3]) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1676 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1677 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1678 print FASTA substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1679 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1680 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1681 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1682 if (@problemCode[$best] !~ m/[SL125]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1683 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1684 print FULLPARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1685 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1686 print FULLPARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Full ITS region on " . $extendedStrand . "\n"; # Write FASTA defline
f82c70f54bd7 Uploaded okorol parents: diff changeset	1687 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1688 print FULLPARTIAL substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1689 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1690 $its1PartLen = @its1End[$best] - @its1Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1691 $its2PartLen = @its2End[$best] - @its2Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1692 if (($out_partial < $its1PartLen) && ($out_partial < $its2PartLen)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1693 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1694 print FULLPARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1695 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1696 print FULLPARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] ." " . $extendedType . " (" . $seqDNALength . " bp) Partial ITS region on " . $extendedStrand . "\n"; # Write FASTA defline
f82c70f54bd7 Uploaded okorol parents: diff changeset	1697 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1698 print FULLPARTIAL substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1699 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1700 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1701 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1702 } else { # If sequence is regarded chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1703 $itsChimeric++; # Add one to the chimeric counter
f82c70f54bd7 Uploaded okorol parents: diff changeset	1704 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1705 print CHIMERA $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1706 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1707 print CHIMERA ">" . @seqID[$best] . "\|" . @seqITSType[$best] . " Chimeric " . $extendedType . " (" . $fastaLength . " bp) on " . $extendedStrand . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1708 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1709 print CHIMERA @seqDNA[$best] . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1710 $foundChimera++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1711 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1712 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1713 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1714
f82c70f54bd7 Uploaded okorol parents: diff changeset	1715 if ($out_joined == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1716 if ($chimeric == 0) { # If the sequence is not chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1717 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1718 print JOINED $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1719 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1720 print JOINED ">" . @seqID[$best] . "\|" . @seqITSType[$best] . " " . $extendedType . " (" . $fastaLength . " bp) From domain " . @startDomain[$best] . " to " . @endDomain[$best] . " on " . $extendedStrand . " Found domains: "; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1721 print JOINED substr(@allSeqDomains[$best],0,length(@allSeqDomains[$best]) - 1) . "\n"; # Write domain order
f82c70f54bd7 Uploaded okorol parents: diff changeset	1722 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1723 print JOINED substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1724 } else { # If sequence is regarded chimeric
f82c70f54bd7 Uploaded okorol parents: diff changeset	1725 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1726 print JOINED $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1727 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1728 print JOINED ">" . @seqID[$best] . "\|" . @seqITSType[$best] . " Chimeric " . $extendedType . " (" . $fastaLength . " bp) From domain " . @startDomain[$best] . " to " . @endDomain[$best] . " on " . $extendedStrand . " Found domains: "; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1729 print JOINED substr(@allSeqDomains[$best],0,length(@allSeqDomains[$best]) - 1) . "\n"; # Write domain order
f82c70f54bd7 Uploaded okorol parents: diff changeset	1730 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1731 print JOINED substr(@seqDNA[$best],$fastaStartPoint,$fastaLength) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1732 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1733 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1734
f82c70f54bd7 Uploaded okorol parents: diff changeset	1735 ## Write SSU sequence to file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1736 if ($out_ssu == 1) { # If SSU output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1737 if (@problemCode[$best] !~ m/S/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1738 if ($only_full == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1739 $seqPartLen = @ssuEnd[$best] - @ssuStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1740 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1741 print SSU $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1742 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1743 print SSU ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|SSU " . "Extracted SSU sequence (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1744 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1745 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1746 if (@ssuStart[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1747 print SSU substr(@seqDNA[$best], @ssuStart[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1748 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1749 print SSU substr(@seqDNA[$best], 0, @ssuStart[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1750 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1751 print SSU substr(@seqDNA[$best], @ssuStart[$best] - 1, @ssuEnd[$best] - @ssuStart[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1752 if (@ssuEnd[$best] - @ssuStart[$best] + 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1753 print SSU substr(@seqDNA[$best], @ssuEnd[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1754 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1755 print SSU substr(@seqDNA[$best], @ssuEnd[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1756 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1757 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1758 print SSU substr(@seqDNA[$best], @ssuStart[$best] - 1, @ssuEnd[$best] - @ssuStart[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1759 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1760 print SSU "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1761 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1762 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1763 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1764
f82c70f54bd7 Uploaded okorol parents: diff changeset	1765 ## Write LSU sequence to file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1766 if ($out_lsu == 1) { # If LSU output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1767 if ($only_full == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1768 if (@problemCode[$best] !~ m/L/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1769 $seqPartLen = @lsuEnd[$best] - @lsuStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1770 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1771 print LSU $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1772 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1773 print LSU ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|LSU " . "Extracted LSU sequence (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1774 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1775 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1776 if (@lsuStart[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1777 print LSU substr(@seqDNA[$best], @lsuStart[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1778 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1779 print LSU substr(@seqDNA[$best], 0, @lsuStart[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1780 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1781 print LSU substr(@seqDNA[$best], @lsuStart[$best] - 1, @lsuEnd[$best] - @lsuStart[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1782 #if (@lsuEnd[$best] - @lsuStart[$best] + 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1783 # print LSU substr(@seqDNA[$best], @lsuEnd[$best] - @lsuStart[$best] + 1, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1784 #} else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1785 # print LSU substr(@seqDNA[$best], @lsuEnd[$best] - @lsuStart[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1786 #}
f82c70f54bd7 Uploaded okorol parents: diff changeset	1787 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1788 print LSU substr(@seqDNA[$best], @lsuStart[$best] - 1, @lsuEnd[$best] - @lsuStart[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1789 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1790 print LSU "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1791 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1792 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1793 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1794
f82c70f54bd7 Uploaded okorol parents: diff changeset	1795 ## Write 5.8S sequence to file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1796 if ($out_58S == 1) { # If 5.8S output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1797 if (@problemCode[$best] !~ m/[125]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1798 $seqPartLen = @midEnd[$best] - @midStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1799 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1800 print MID $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1801 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1802 print MID ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|5.8S " . "Extracted 5.8S sequence (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1803 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1804 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1805 if (@midStart[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1806 print MID substr(@seqDNA[$best], @midStart[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1807 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1808 print MID substr(@seqDNA[$best], 0, @midStart[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1809 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1810 print MID substr(@seqDNA[$best], @midStart[$best] - 1, @midEnd[$best] - @midStart[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1811 if (@midEnd[$best] - @midStart[$best] + 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1812 print MID substr(@seqDNA[$best], @midEnd[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1813 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1814 print MID substr(@seqDNA[$best], @midEnd[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1815 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1816 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1817 print MID substr(@seqDNA[$best], @midStart[$best] - 1, @midEnd[$best] - @midStart[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1818 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1819 print MID "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1820 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1821 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1822
f82c70f54bd7 Uploaded okorol parents: diff changeset	1823 ## Write ITS1 sequence to file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1824 if ($out_its1 == 1) { # If ITS1 output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1825 $seqPartLen = @its1End[$best] - @its1Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1826 if ($seqPartLen > 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1827 if ( (($only_full == 0) && (@problemCode[$best] !~ m/[15]/)) \|\| (($only_full == 1) && (@problemCode[$best] !~ m/[S15]/))) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1828 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1829 print ITS1 $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1830 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1831 print ITS1 ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS1 " . "Extracted ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1832 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1833 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1834 if (@its1Start[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1835 print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1836 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1837 print ITS1 substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1838 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1839 print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1840 if (length(@seqDNA[$best]) - @its1End[$best] - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1841 print ITS1 substr(@seqDNA[$best], @its1End[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1842 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1843 print ITS1 substr(@seqDNA[$best], @its1End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1844 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1845 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1846 if (@its1Start[$best] - 1 - @allanchorLens[0] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1847 print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1 - @allanchorLens[0], @allanchorLens[0]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1848 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1849 print ITS1 substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1850 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1851 print ITS1 substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1852 if (length(@seqDNA[$best]) - @its1End[$best] - @allanchorLens[1] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1853 print ITS1 substr(@seqDNA[$best], @its1End[$best], @allanchorLens[1]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1854 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1855 print ITS1 substr(@seqDNA[$best], @its1End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1856 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1857
f82c70f54bd7 Uploaded okorol parents: diff changeset	1858 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1859 print ITS1 "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1860 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1861 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1862 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1863 if (@problemCode[$best] !~ m/[S15]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1864 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1865 print ITS1PARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1866 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1867 print ITS1PARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS1 " . "Extracted Full ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1868 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1869 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1870 if (@its1Start[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1871 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1872 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1873 print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1874 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1875 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1876 if (length(@seqDNA[$best]) - @its1End[$best] - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1877 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1878 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1879 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1880 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1881 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1882 if (@its1Start[$best] - 1 - @allanchorLens[0] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1883 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - @allanchorLens[0], @allanchorLens[0]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1884 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1885 print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1886 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1887 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1888 if (length(@seqDNA[$best]) - @its1End[$best] - @allanchorLens[1] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1889 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], @allanchorLens[1]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1890 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1891 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1892 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1893
f82c70f54bd7 Uploaded okorol parents: diff changeset	1894 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1895 print ITS1PARTIAL "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1896 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1897 if ($out_partial < $seqPartLen) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1898 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1899 print ITS1PARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1900 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1901 print ITS1PARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS1 " . "Extracted Partial ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1902 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1903 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1904 if (@its1Start[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1905 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1906 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1907 print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1908 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1909 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1910 if (length(@seqDNA[$best]) - @its1End[$best] - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1911 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1912 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1913 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1914 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1915 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1916 if (@its1Start[$best] - 1 - @allanchorLens[0] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1917 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1 - @allanchorLens[0], @allanchorLens[0]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1918 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1919 print ITS1PARTIAL substr(@seqDNA[$best], 0, @its1Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1920 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1921 print ITS1PARTIAL substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1922 if (length(@seqDNA[$best]) - @its1End[$best] - @allanchorLens[1] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1923 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best], @allanchorLens[1]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1924 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1925 print ITS1PARTIAL substr(@seqDNA[$best], @its1End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1926 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1927 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1928 print ITS1PARTIAL "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1929 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1930 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1931 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1932 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1933
f82c70f54bd7 Uploaded okorol parents: diff changeset	1934 ## Write ITS2 sequence to file
f82c70f54bd7 Uploaded okorol parents: diff changeset	1935 if ($out_its2 == 1) { # If ITS2 output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	1936 $seqPartLen = @its2End[$best] - @its2Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	1937 if ($seqPartLen > 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1938 if ( (($only_full == 0) && (@problemCode[$best] !~ m/[25]/)) \|\| (($only_full == 1) && (@problemCode[$best] !~ m/[L25]/))) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1939 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1940 print ITS2 $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1941 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1942 print ITS2 ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS2 " . "Extracted ITS2 sequence " . @its2Start[$best] . "-" . @its2End[$best] . " (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1943 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1944 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1945 if (@its2Start[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1946 print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1947 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1948 print ITS2 substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1949 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1950 print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1951 if (length(@seqDNA[$best]) - @its2End[$best] - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1952 print ITS2 substr(@seqDNA[$best], @its2End[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1953 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1954 print ITS2 substr(@seqDNA[$best], @its2End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1955 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1956 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1957 if (@its2Start[$best] - 1 - @allanchorLens[2] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1958 print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1 - @allanchorLens[2], @allanchorLens[2]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1959 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1960 print ITS2 substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1961 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1962 print ITS2 substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1963 if (length(@seqDNA[$best]) - @its2End[$best] - @allanchorLens[3] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1964 print ITS2 substr(@seqDNA[$best], @its2End[$best], @allanchorLens[3]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1965 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1966 print ITS2 substr(@seqDNA[$best], @its2End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1967 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1968 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1969 print ITS2 "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	1970 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1971 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1972 if (@problemCode[$best] !~ m/[L25]/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1973 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	1974 print ITS2PARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1975 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1976 print ITS2PARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS2 " . "Extracted Full ITS2 sequence " . @its2Start[$best] . "-" . @its2End[$best] . " (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	1977 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1978 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1979 if (@its2Start[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1980 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1981 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1982 print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1983 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1984 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1985 if (length(@seqDNA[$best]) - @its2End[$best] - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1986 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1987 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1988 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1989 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1990 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1991 if (@its2Start[$best] - 1 - @allanchorLens[2] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1992 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - @allanchorLens[2], @allanchorLens[2]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1993 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1994 print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1995 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	1996 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1997 if (length(@seqDNA[$best]) - @its2End[$best] - @allanchorLens[3] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	1998 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], @allanchorLens[3]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	1999 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2000 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2001 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2002 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2003 print ITS2PARTIAL "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2004 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2005 if ($out_partial < $seqPartLen) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2006 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2007 print ITS2PARTIAL $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2008 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2009 print ITS2PARTIAL ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS2 " . "Extracted Partial ITS2 sequence " . @its2Start[$best] . "-" . @its2End[$best] . " (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2010 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2011 if (uc($anchor) ne "HMM") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2012 if (@its2Start[$best] - 1 - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2013 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - $anchorLen, $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2014 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2015 print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2016 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2017 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2018 if (length(@seqDNA[$best]) - @its2End[$best] - $anchorLen > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2019 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], $anchorLen);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2020 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2021 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2022 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2023 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2024 if (@its2Start[$best] - 1 - @allanchorLens[2] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2025 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1 - @allanchorLens[2], @allanchorLens[2]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2026 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2027 print ITS2PARTIAL substr(@seqDNA[$best], 0, @its2Start[$best] - 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2028 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2029 print ITS2PARTIAL substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2030 if (length(@seqDNA[$best]) - @its2End[$best] - @allanchorLens[3] > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2031 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best], @allanchorLens[3]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2032 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2033 print ITS2PARTIAL substr(@seqDNA[$best], @its2End[$best]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2034 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2035 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2036 print ITS2PARTIAL "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2037 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2038 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2039 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2040 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2041 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2042
f82c70f54bd7 Uploaded okorol parents: diff changeset	2043 ## Output concatenated ITS1 + ITS2 sequences
f82c70f54bd7 Uploaded okorol parents: diff changeset	2044 if ($out_concat == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2045 $seqPartLen1 = @its1End[$best] - @its1Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2046 $seqPartLen2 = @its2End[$best] - @its2Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2047 if (($seqPartLen1 >= $concat_minlen) && ($seqPartLen2 >= $concat_minlen)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2048 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2049 print CONCAT $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2050 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2051 print CONCAT ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS1+2 " . "Concatenated ITS1 and ITS2 sequences (" . ($seqPartLen1 + $seqPartLen2) . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2052 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2053 print CONCAT substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1); # Write ITS1 DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2054 print CONCAT "-----"; # Write spacer
f82c70f54bd7 Uploaded okorol parents: diff changeset	2055 print CONCAT substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1); # Write ITS2 DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2056 print CONCAT "\n"; # Write newline
f82c70f54bd7 Uploaded okorol parents: diff changeset	2057 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2058 if ($seqPartLen1 >= $concat_minlen) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2059 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2060 print CONCAT $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2061 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2062 print CONCAT ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS1 " . "ITS1 sequence (ITS2 too short) (" . $seqPartLen1 . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2063 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2064 print CONCAT substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1); # Write ITS1 DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2065 print CONCAT "-----"; # Write spacer
f82c70f54bd7 Uploaded okorol parents: diff changeset	2066 print CONCAT "\n"; # Write newline
f82c70f54bd7 Uploaded okorol parents: diff changeset	2067 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2068 if ($seqPartLen2 >= $concat_minlen) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2069 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2070 print CONCAT $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2071 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2072 print CONCAT ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS2 " . "ITS2 sequence (ITS1 too short) (" . $seqPartLen2 . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2073 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2074 print CONCAT "-----"; # Write spacer
f82c70f54bd7 Uploaded okorol parents: diff changeset	2075 print CONCAT substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1); # Write ITS2 DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2076 print CONCAT "\n"; # Write newline
f82c70f54bd7 Uploaded okorol parents: diff changeset	2077 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2078 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2079 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2080
f82c70f54bd7 Uploaded okorol parents: diff changeset	2081 ## Print all sequences to the joined file for debugging
f82c70f54bd7 Uploaded okorol parents: diff changeset	2082 if ($out_joined == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2083 $seqPartLen = @ssuEnd[$best] - @ssuStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2084 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2085 print JOINED $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2086 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2087 print JOINED ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|SSU " . "Extracted SSU sequence (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2088 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2089 print JOINED substr(@seqDNA[$best], @ssuStart[$best] - 1, @ssuEnd[$best] - @ssuStart[$best] + 1) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2090
f82c70f54bd7 Uploaded okorol parents: diff changeset	2091 $seqPartLen = @its1End[$best] - @its1Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2092 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2093 print JOINED $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2094 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2095 print JOINED ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS1 " . "Extracted ITS1 sequence " . @its1Start[$best] . "-" . @its1End[$best] . " (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2096 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2097 print JOINED substr(@seqDNA[$best], @its1Start[$best] - 1, @its1End[$best] - @its1Start[$best] + 1) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2098
f82c70f54bd7 Uploaded okorol parents: diff changeset	2099 $seqPartLen = @midEnd[$best] - @midStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2100 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2101 print JOINED $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2102 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2103 print JOINED ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|5.8S " . "Extracted 5.8S sequence (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2104 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2105 print JOINED substr(@seqDNA[$best], @midStart[$best] - 1, @midEnd[$best] - @midStart[$best] + 1) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2106
f82c70f54bd7 Uploaded okorol parents: diff changeset	2107 $seqPartLen = @its2End[$best] - @its2Start[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2108 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2109 print JOINED $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2110 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2111 print JOINED ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|ITS2 " . "Extracted ITS2 sequence (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2112 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2113 print JOINED substr(@seqDNA[$best], @its2Start[$best] - 1, @its2End[$best] - @its2Start[$best] + 1) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2114
f82c70f54bd7 Uploaded okorol parents: diff changeset	2115 $seqPartLen = @lsuEnd[$best] - @lsuStart[$best] + 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2116 if ($out_preserve == 1) { # If sequence headers should be preserved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2117 print JOINED $headers{@seqID[$best]} . "\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2118 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2119 print JOINED ">" . @seqID[$best] . "\|" . @seqITSType[$best] . "\|LSU " . "Extracted LSU sequence (" . $seqPartLen . " bp)\n"; # Write FASTA definition line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2120 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2121 print JOINED substr(@seqDNA[$best], @lsuStart[$best] - 1, @lsuEnd[$best] - @lsuStart[$best] + 1) . "\n"; # Write DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2122 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2123
f82c70f54bd7 Uploaded okorol parents: diff changeset	2124
f82c70f54bd7 Uploaded okorol parents: diff changeset	2125 if ($out_results == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2126 ## Print info on all matches, also not top ones to the results file...
f82c70f54bd7 Uploaded okorol parents: diff changeset	2127 for ($i = 0; $i < scalar(@seqITSType); $i++) { # Go through all the possible ITS types...
f82c70f54bd7 Uploaded okorol parents: diff changeset	2128 print RESULTS @seqITSType[$i] . ": " . @seqDomCounts[$i] . " " . @seqAvgE[$i] . " " . @seqAvgScore[$i]; # Write some info on this type (Type, Domain count, Average E-value, Average score)
f82c70f54bd7 Uploaded okorol parents: diff changeset	2129 if ($i < scalar(@seqITSType) - 1) { # If this is no the last domain type
f82c70f54bd7 Uploaded okorol parents: diff changeset	2130 print RESULTS ", "; # Write a comma
f82c70f54bd7 Uploaded okorol parents: diff changeset	2131 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2132 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2133 print RESULTS "\n"; # Write end of line
f82c70f54bd7 Uploaded okorol parents: diff changeset	2134 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2135 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2136
f82c70f54bd7 Uploaded okorol parents: diff changeset	2137 ## Undefine all used arrays for the next round...
f82c70f54bd7 Uploaded okorol parents: diff changeset	2138 undef @seqID;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2139 undef @seqITSType;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2140 undef @seqCompl;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2141 undef @seqDomCounts;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2142 undef @seqAvgE;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2143 undef @seqAvgScore;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2144 undef @seqScoreSum;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2145 undef @seqDNA;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2146 undef @allSeqDomains;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2147 undef @dnaStart;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2148 undef @dnaEnd;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2149 undef @ssuStart;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2150 undef @ssuEnd;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2151 undef @lsuStart;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2152 undef @lsuEnd;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2153 undef @midStart;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2154 undef @midEnd;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2155 undef @its1Start;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2156 undef @its1End;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2157 undef @its2Start;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2158 undef @its2End;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2159 undef @startDomain;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2160 undef @endDomain;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2161 undef @domain_order;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2162 undef @sorted_domain_order;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2163 undef @problem;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2164 undef @problemCode;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2165 undef @anchorLens;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2166 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2167 if (@item[0] ne "") { # Add this entry to the set (regardless if the entry has the same ID as entries already in the set), as long as it is non-empty
f82c70f54bd7 Uploaded okorol parents: diff changeset	2168 push(@seqID, @item[0]); # Add sequence ID
f82c70f54bd7 Uploaded okorol parents: diff changeset	2169 push(@seqITSType, @item[1]); # Add ITS type
f82c70f54bd7 Uploaded okorol parents: diff changeset	2170 push(@seqCompl, @item[2]); # Add main/complementary strand info
f82c70f54bd7 Uploaded okorol parents: diff changeset	2171 push(@seqDomCounts, @item[3]); # Add domain count
f82c70f54bd7 Uploaded okorol parents: diff changeset	2172 push(@seqAvgE, @item[4]); # Add average E-value
f82c70f54bd7 Uploaded okorol parents: diff changeset	2173 push(@seqAvgScore, @item[5]); # Add average score
f82c70f54bd7 Uploaded okorol parents: diff changeset	2174 push(@seqScoreSum, @item[6]); # Add sum-of-scores
f82c70f54bd7 Uploaded okorol parents: diff changeset	2175 push(@seqDNA, @item[7]); # Add DNA sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2176
f82c70f54bd7 Uploaded okorol parents: diff changeset	2177 ## Determine first and last domains, and their positions
f82c70f54bd7 Uploaded okorol parents: diff changeset	2178 ## Set variables to unrealistic values
f82c70f54bd7 Uploaded okorol parents: diff changeset	2179 $dnaEnd = length(@item[7]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2180 $startDomain = "***";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2181 $endDomain = "***";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2182 $dnaStart = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2183 $allDomains = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2184 $ssuStart = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2185 $ssuEnd = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2186 $midStart = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2187 $midEnd = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2188 $lsuStart = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2189 $lsuEnd = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2190 $its1Start = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2191 $its1End = length(@item[7]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2192 $its2Start = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2193 $its2End = length(@item[7]);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2194 $problem = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2195 $problemCode = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2196
f82c70f54bd7 Uploaded okorol parents: diff changeset	2197 $ssuFound = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2198 $lsuFound = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2199 $midSFound = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2200 $midEFound = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2201 $order = "";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2202 undef @hitanchorlens;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2203
f82c70f54bd7 Uploaded okorol parents: diff changeset	2204 for ($i = 8; $i < scalar(@item); $i++) { # Go through the list of found domains in this sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2205 ($hitStart,$hitEnd,$hitProfile,$hitScore,$hitEval,$hitanchorlen) = split(';',@item[$i]); # Separate the hit stats into variables
f82c70f54bd7 Uploaded okorol parents: diff changeset	2206 $allDomains = $allDomains . $hitProfile . " "; # Add found domain to the list of all domains
f82c70f54bd7 Uploaded okorol parents: diff changeset	2207
f82c70f54bd7 Uploaded okorol parents: diff changeset	2208 # if ($hitStart < $dnaStart) { # If this domain is the first one so far
f82c70f54bd7 Uploaded okorol parents: diff changeset	2209 # $dnaStart = $hitStart; # Set the start of the ITS sequence to this domain's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2210 # $startDomain = $hitProfile; # Set this domain as the starting domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2211 # }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2212 # if ($hitEnd > $dnaEnd) { # If this domain is the last one so far
f82c70f54bd7 Uploaded okorol parents: diff changeset	2213 # $dnaEnd = $hitEnd; # Set the end of the ITS sequence to this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2214 # $endDomain = $hitProfile; # Set this domain as the ending domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2215 # }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2216
f82c70f54bd7 Uploaded okorol parents: diff changeset	2217 if (substr($hitProfile,0,5) eq "1_SSU" ) { # If this domain is the SSU's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2218 $dnaStart = $hitStart; # Set the start of the ITS sequence to this domain's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2219 $startDomain = $hitProfile; # Set this domain as the starting domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2220 $ssuStart = 1; # Set the start of the SSU sequence to the start of the sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2221 $ssuEnd = $hitEnd; # Set the end of the SSU sequence to this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2222 $its1Start = $hitEnd + 1; # Set the start of the ITS1 sequence to right after this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2223 if ($midEFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2224 $its2Start = $hitEnd + 1; # Set the end of the ITS2 sequence to right after this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2225 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2226 $ssuFound = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2227 $order = $order . "1";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2228 @hitanchorlens[1] = $hitanchorlen;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2229 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2230
f82c70f54bd7 Uploaded okorol parents: diff changeset	2231 if (substr($hitProfile,0,5) eq "2_5.8" ) { # If this domain is the 5.8S's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2232 if ($startDomain eq "***") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2233 $dnaStart = 1; # Set the start of the ITS sequence to the start of the sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2234 $startDomain = $hitProfile; # Set this domain as the starting domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2235 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2236 $its1End = $hitStart - 1; # Set the end of the ITS1 sequence to right before this domain's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2237 if ($midEFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2238 $its2Start = $hitEnd + 1; # Set the start of the ITS2 sequence to right after this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2239 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2240 $midStart = $hitStart; # Set the start of the 5.8S sequence to this domain's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2241 $midSFound = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2242 $order = $order . "2";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2243 @hitanchorlens[2] = $hitanchorlen;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2244 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2245
f82c70f54bd7 Uploaded okorol parents: diff changeset	2246 if (substr($hitProfile,0,5) eq "3_End" ) { # If this domain is the 5.8S's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2247 if ($startDomain eq "***") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2248 $dnaStart = 1; # Set the start of the ITS sequence to the start of the sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2249 $startDomain = $hitProfile; # Set this domain as the starting domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2250 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2251 if ($midSFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2252 $its1End = $hitStart - 1; # Set the end of the ITS1 sequence to right before this domain's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2253 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2254 $its2Start = $hitEnd + 1; # Set the end of the ITS2 sequence to right after this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2255 $midEnd = $hitEnd; # Set the end of the 5.8S sequence to this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2256 $midEFound = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2257 $order = $order . "3";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2258 @hitanchorlens[3] = $hitanchorlen;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2259 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2260
f82c70f54bd7 Uploaded okorol parents: diff changeset	2261 if (substr($hitProfile,0,5) eq "4_LSU" ) { # If this domain is the LSU's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2262 if ($startDomain eq "***") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2263 $dnaStart = 1; # Set the start of the ITS sequence to the start of the sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2264 $startDomain = $hitProfile; # Set this domain as the starting domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2265 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2266 $dnaEnd = $hitEnd; # Set the end of the ITS sequence to this domain's end
f82c70f54bd7 Uploaded okorol parents: diff changeset	2267 $endDomain = $hitProfile; # Set this domain as the ending domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2268 $lsuStart = $hitStart; # Set the start of the LSU sequence to the start of the this domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2269 $lsuEnd = length(@item[7]); # Set the end of the LSU sequence to the end of the sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2270 $its2End = $hitStart - 1; # Set the end of the ITS2 sequence to right before this domain's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2271 if ($midSFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2272 $its1End = $hitStart - 1; # Set the end of the ITS1 sequence to right before this domain's start
f82c70f54bd7 Uploaded okorol parents: diff changeset	2273 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2274 $lsuFound = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2275 $order = $order . "4";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2276 @hitanchorlens[4] = $hitanchorlen;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2277 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2278 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2279
f82c70f54bd7 Uploaded okorol parents: diff changeset	2280 if ($ssuFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2281 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2282 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2283 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2284 $problem = $problem . "End of SSU sequence not found";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2285 $problemCode = $problemCode . "S";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2286 $order = "1" . $order;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2287 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2288
f82c70f54bd7 Uploaded okorol parents: diff changeset	2289 if ($lsuFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2290 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2291 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2292 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2293 $problem = $problem . "Start of LSU sequence not found";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2294 $problemCode = $problemCode . "L";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2295 $order = $order . "4";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2296 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2297
f82c70f54bd7 Uploaded okorol parents: diff changeset	2298 if (($midSFound == 0) && ($midEFound == 0)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2299 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2300 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2301 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2302 $problem = $problem . "The 5.8S sequence was not found at all";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2303 $problemCode = $problemCode . "5";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2304 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2305 if ($midSFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2306 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2307 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2308 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2309 $problem = $problem . "Start of 5.8S sequence not found";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2310 $problemCode = $problemCode . "1";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2311 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2312 if ($midEFound == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2313 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2314 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2315 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2316 $problem = $problem . "End of 5.8S sequence not found";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2317 $problemCode = $problemCode . "2";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2318 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2319 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2320
f82c70f54bd7 Uploaded okorol parents: diff changeset	2321 if ($dnaEnd - $dnaStart > 1500) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2322 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2323 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2324 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2325 $problem = $problem . "ITS region is suspiciously long (> 1500 bp)";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2326 $problemCode = $problemCode . "B";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2327 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2328
f82c70f54bd7 Uploaded okorol parents: diff changeset	2329 if (length($order) == 4) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2330 if ($order ne "1234") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2331 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2332 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2333 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2334 $problem = $problem . "Domains found in wrong order, sequence may be chimeric";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2335 $problemCode = $problemCode . "C";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2336 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2337 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2338 if ((substr($order,0,1) ne "1") \|\| (substr($order,-1) ne "4")) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2339 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2340 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2341 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2342 $problem = $problem . "Domains found in wrong order, sequence may be chimeric";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2343 $problemCode = $problemCode . "C";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2344 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2345 if (($midSFound == 1) && ($midEFound == 1)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2346 if ($order !~ m/23/) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2347 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2348 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2349 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2350 $problem = $problem . "Domains found in wrong order, sequence may be chimeric";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2351 $problemCode = $problemCode . "C";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2352 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2353 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2354 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2355 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2356
f82c70f54bd7 Uploaded okorol parents: diff changeset	2357 if ($its1End - $its1Start < 0) { # 5.8S overlaps SSU
f82c70f54bd7 Uploaded okorol parents: diff changeset	2358 if (($midSFound == 1) && ($ssuFound == 1)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2359 $problemCode = $problemCode . "OC";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2360 $its1Start = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2361 $its1End = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2362 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2363 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2364 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2365 $problem = $problem . "SSU seems to overlap 5.8S, sequence may be chimeric";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2366 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2367 $problemCode = $problemCode . "X";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2368 $its1Start = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2369 $its1End = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2370 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2371 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2372 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2373 $problem = $problem . "No ITS1 sequence";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2374 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2375 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2376 if ($its2End - $its2Start < 0) { # 5.8S overlaps LSU
f82c70f54bd7 Uploaded okorol parents: diff changeset	2377 if (($midEFound == 1) && ($lsuFound == 1)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2378 $problemCode = $problemCode . "PC";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2379 $its2Start = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2380 $its2End = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2381 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2382 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2383 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2384 $problem = $problem . "LSU seems to overlap 5.8S, sequence may be chimeric";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2385 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2386 $problemCode = $problemCode . "Y";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2387 $its2Start = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2388 $its2End = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2389 if ($problem ne "") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2390 $problem = $problem . "; ";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2391 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2392 $problem = $problem . "No ITS2 sequence";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2393 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2394 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2395
f82c70f54bd7 Uploaded okorol parents: diff changeset	2396
f82c70f54bd7 Uploaded okorol parents: diff changeset	2397 if (($problemCode =~ m/[15]/) && ($problemCode =~ m/[S]/)) { # Sequence lack all indications of ITS1
f82c70f54bd7 Uploaded okorol parents: diff changeset	2398 $its1Start = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2399 $its1End = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2400 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2401 if (($problemCode =~ m/[25]/) && ($problemCode =~ m/[L]/)) { # Sequence lack all indications of ITS2
f82c70f54bd7 Uploaded okorol parents: diff changeset	2402 $its2Start = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2403 $its2End = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2404 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2405
f82c70f54bd7 Uploaded okorol parents: diff changeset	2406
f82c70f54bd7 Uploaded okorol parents: diff changeset	2407 $allhitanchorlens = @hitanchorlens[1] . "," . @hitanchorlens[2] . "," . @hitanchorlens[3] . "," . @hitanchorlens[4];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2408
f82c70f54bd7 Uploaded okorol parents: diff changeset	2409 push(@allSeqDomains,$allDomains); # Add list of all domains
f82c70f54bd7 Uploaded okorol parents: diff changeset	2410 push(@dnaStart, $dnaStart); # Add start of the ITS sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2411 push(@startDomain, $startDomain); # Add start domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2412 push(@dnaEnd, $dnaEnd); # Add end of ITS sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2413 push(@endDomain, $endDomain); # Add end domain
f82c70f54bd7 Uploaded okorol parents: diff changeset	2414 push(@ssuStart, $ssuStart); # Add start of the SSU sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2415 push(@ssuEnd, $ssuEnd); # Add end of the SSU sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2416 push(@midStart, $midStart); # Add start of the 5.8S sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2417 push(@midEnd, $midEnd); # Add end of the 5.8S sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2418 push(@lsuStart, $lsuStart); # Add start of the LSU sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2419 push(@lsuEnd, $lsuEnd); # Add end of the LSU sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2420 push(@its1Start, $its1Start); # Add start of the ITS1 sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2421 push(@its1End, $its1End); # Add end of the ITS1 sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2422 push(@its2Start, $its2Start); # Add start of the ITS2 sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2423 push(@its2End, $its2End); # Add end of the ITS2 sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2424 push(@problem, $problem); # Add potential problem info
f82c70f54bd7 Uploaded okorol parents: diff changeset	2425 push(@problemCode, $problemCode); # Add potential problem info in code form
f82c70f54bd7 Uploaded okorol parents: diff changeset	2426 push(@anchorLens, $allhitanchorlens); # Add the anchor lengths
f82c70f54bd7 Uploaded okorol parents: diff changeset	2427 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2428 $lc++; # Increase the line count by one
f82c70f54bd7 Uploaded okorol parents: diff changeset	2429 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2430
f82c70f54bd7 Uploaded okorol parents: diff changeset	2431 ## Save results to the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2432 if ($out_sum == 1) { # If summary file should be written
f82c70f54bd7 Uploaded okorol parents: diff changeset	2433 $itsTotal = 0; # Reset the total ITS sum
f82c70f54bd7 Uploaded okorol parents: diff changeset	2434 foreach $typeCount (@itsCounts) { # Add ITSs from all different origins
f82c70f54bd7 Uploaded okorol parents: diff changeset	2435 $itsTotal += $typeCount; # Add the number of ITSs for this origin
f82c70f54bd7 Uploaded okorol parents: diff changeset	2436 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2437 ## Write info on the found ITS sequences to the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2438 print SUMMARY "Sequences detected as ITS by ITSx:\t$itsTotal\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2439 print SUMMARY " On main strand: \t$itsMain\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2440 print SUMMARY " On complementary strand:\t$itsCompl\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2441 if ($allow_reorder == 0) { # If re-ordering of domains is not allowed
f82c70f54bd7 Uploaded okorol parents: diff changeset	2442 print SUMMARY "Sequences detected as chimeric by ITSx:\t$itsChimeric\n"; # Write the number of reported chimeric sequences to the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2443 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2444 ## Write info on the found ITS sequence types to the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2445 print SUMMARY "ITS sequences by preliminary origin:\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2446 print SUMMARY " Alveolates: \t" . int(@itsCounts[ord("A")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2447 print SUMMARY " Amoebozoa: \t" . int(@itsCounts[ord("D")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2448 print SUMMARY " Bacillariophyta: \t" . int(@itsCounts[ord("C")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2449 print SUMMARY " Brown algae: \t" . int(@itsCounts[ord("I")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2450 print SUMMARY " Bryophytes: \t" . int(@itsCounts[ord("B")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2451 print SUMMARY " Euglenozoa: \t" . int(@itsCounts[ord("E")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2452 print SUMMARY " Eustigmatophytes:\t" . int(@itsCounts[ord("U")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2453 print SUMMARY " Fungi: \t" . int(@itsCounts[ord("F")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2454 print SUMMARY " Green algae: \t" . int(@itsCounts[ord("G")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2455 print SUMMARY " Liverworts: \t" . int(@itsCounts[ord("L")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2456 print SUMMARY " Metazoa: \t" . int(@itsCounts[ord("M")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2457 print SUMMARY " Microsporidia: \t" . int(@itsCounts[ord("N")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2458 print SUMMARY " Oomycetes: \t" . int(@itsCounts[ord("O")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2459 print SUMMARY " Prymnesiophytes: \t" . int(@itsCounts[ord("P")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2460 print SUMMARY " Raphidophytes: \t" . int(@itsCounts[ord("Q")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2461 print SUMMARY " Red algae: \t" . int(@itsCounts[ord("H")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2462 print SUMMARY " Rhizaria: \t" . int(@itsCounts[ord("R")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2463 print SUMMARY " Synurophyceae: \t" . int(@itsCounts[ord("S")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2464 print SUMMARY " Tracheophyta: \t" . int(@itsCounts[ord("T")]) . "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2465 print SUMMARY "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2466 close (SUMMARY); # Close the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2467 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2468
f82c70f54bd7 Uploaded okorol parents: diff changeset	2469 if ($out_results == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2470 close (RESULTS); # Close the results file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2471 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2472 close (RAWOUT); # Close the raw output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2473 close (PROBLEM); # Close the file for problematic entries
f82c70f54bd7 Uploaded okorol parents: diff changeset	2474 if ($foundProblem == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2475 `rm $output.problematic.txt 2> /dev/null`;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2476 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2477 if ($out_pos == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2478 close (POS); # Close the positions file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2479 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2480 if ($out_fasta == 1) { # If FASTA output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	2481 close (FASTA); # Close the FASTA output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2482 if ($allow_reorder == 0) { # If re-ordering of domains is not allowed
f82c70f54bd7 Uploaded okorol parents: diff changeset	2483 close (CHIMERA); # Close the chimera file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2484 if ($foundChimera == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2485 `rm $output.chimeric.fasta 2> /dev/null`;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2486 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2487 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2488 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2489 close FULLPARTIAL;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2490 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2491 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2492 if ($out_ssu == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2493 close (SSU); # Close the SSU file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2494 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2495 if ($out_lsu == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2496 close (LSU); # Close the LSU file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2497 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2498 if ($out_58S == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2499 close (MID); # Close the 58S file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2500 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2501 if ($out_its1 == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2502 close (ITS1); # Close the ITS1 file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2503 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2504 close ITS1PARTIAL;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2505 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2506 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2507 if ($out_its2 == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2508 close (ITS2); # Close the ITS2 file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2509 if ($out_partial > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2510 close ITS2PARTIAL;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2511 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2512 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2513 if ($out_concat == 1) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2514 close CONCAT;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2515 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2516
f82c70f54bd7 Uploaded okorol parents: diff changeset	2517 if ($out_not == 1) { # If not-found output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	2518 open (NOTFOUND, "$output\_no_detections.txt"); # Open the not-found output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2519 while ($line = <NOTFOUND>) { # Read all entries from file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2520 chomp($line); # Remove newline char
f82c70f54bd7 Uploaded okorol parents: diff changeset	2521 push(@nodetectionlist,$line); # Add to non-detection list
f82c70f54bd7 Uploaded okorol parents: diff changeset	2522 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2523 close NOTFOUND;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2524
f82c70f54bd7 Uploaded okorol parents: diff changeset	2525 open (NOTFOUND, ">$output\_no_detections.fasta"); # Create a not-found FASTA output file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2526 foreach $seqID (@nodetectionlist) { # For all non-detections
f82c70f54bd7 Uploaded okorol parents: diff changeset	2527 $seq = $sequenceDB{"$seqID"}; # Get sequence from sequence database
f82c70f54bd7 Uploaded okorol parents: diff changeset	2528 print NOTFOUND ">$seqID\n"; # Print not found sequence ID
f82c70f54bd7 Uploaded okorol parents: diff changeset	2529 print NOTFOUND $seq . "\n"; # Print not found sequence
f82c70f54bd7 Uploaded okorol parents: diff changeset	2530 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2531 close NOTFOUND;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2532 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2533
f82c70f54bd7 Uploaded okorol parents: diff changeset	2534 ## Clean up and finish
f82c70f54bd7 Uploaded okorol parents: diff changeset	2535
f82c70f54bd7 Uploaded okorol parents: diff changeset	2536 if ($pipeline == 0) { # If ITSx is not called from the pipeline mode (i.e. from ITSx)
f82c70f54bd7 Uploaded okorol parents: diff changeset	2537 if ($save_raw == 1) { # If raw data should be saved
f82c70f54bd7 Uploaded okorol parents: diff changeset	2538 `mv $tempDir $output\_ITSx_raw_output`; # Change the name of the temporary directory to ..._ITSx_raw_output
f82c70f54bd7 Uploaded okorol parents: diff changeset	2539 } else { # Else, discard the raw data
f82c70f54bd7 Uploaded okorol parents: diff changeset	2540 `rm -rf $tempDir`; # Remove the temporary directory
f82c70f54bd7 Uploaded okorol parents: diff changeset	2541 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2542 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2543
f82c70f54bd7 Uploaded okorol parents: diff changeset	2544 ## Get the current time and output a finished message
f82c70f54bd7 Uploaded okorol parents: diff changeset	2545 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2546 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2547 print STDERR "$now : Extraction finished!\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2548 print STDERR "-----------------------------------------------------------------\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2549 print STDERR "Thank you for using ITSx!\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2550 print STDERR "Please report bugs or unsupported lineages to itsx\@microbiology.se\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2551 print STDERR "\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2552 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2553
f82c70f54bd7 Uploaded okorol parents: diff changeset	2554 ## Write end time a summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2555 if ($pipeline == 0) { # If not running in pipeline mode
f82c70f54bd7 Uploaded okorol parents: diff changeset	2556 if ($out_sum == 1) { # If summary output is on
f82c70f54bd7 Uploaded okorol parents: diff changeset	2557 open (SUMMARY, ">>$output.summary.txt"); # Append to the summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2558 print SUMMARY "ITSx run finished at $now.\n"; # Write ending time for the analysis
f82c70f54bd7 Uploaded okorol parents: diff changeset	2559 close (SUMMARY); # Close summary file
f82c70f54bd7 Uploaded okorol parents: diff changeset	2560 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2561 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2562
f82c70f54bd7 Uploaded okorol parents: diff changeset	2563 sub hmmerSearch {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2564 $hmmerCommand = $_[0];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2565 $outputFile = $_[1];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2566 $strand = $_[2];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2567 $profileSet = $_[3];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2568 open (HMMEROUTPUT, ">$outputFile");
f82c70f54bd7 Uploaded okorol parents: diff changeset	2569 open (HMMER, "$hmmerCommand \|");
f82c70f54bd7 Uploaded okorol parents: diff changeset	2570 $totalHitCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2571 $hitCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2572 $SSUCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2573 $LSUCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2574 $startCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2575 $endCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2576 $maxCount = 1;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2577 while (chomp($line = <HMMER>)) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2578 if (substr($line,0,6) eq "Query:") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2579 $hitCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2580 $SSUCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2581 $LSUCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2582 $startCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2583 $endCount = 0;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2584 undef @bestScore;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2585 undef @bestEntry;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2586
f82c70f54bd7 Uploaded okorol parents: diff changeset	2587 $query = substr($line,7);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2588 $queryLength = $query;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2589 $queryLength =~ s/.* *//;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2590 $queryLength =~ s/[^0-9]//g;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2591 $query =~ s/ *//;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2592 $query =~ s/ .//;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2593
f82c70f54bd7 Uploaded okorol parents: diff changeset	2594 print HMMEROUTPUT "## New query:\t$query\t$queryLength\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2595
f82c70f54bd7 Uploaded okorol parents: diff changeset	2596 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2597 if (substr($line,0,12) eq "Description:") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2598 $desc = $line;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2599 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2600 if (substr($line,0,3) eq ">> ") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2601 ($tempshit,$hmmerSubjectName) = split(' ',$line);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2602 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2603 if ($line =~ m/[0-9] ! /) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2604 $stats = $line;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2605 $stats =~ s/ */\t/g;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2606 ($empty,$no,$excl,$score) = split('\t',$stats);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2607 $hitCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2608 $totalHitCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2609 if ($maxCount == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2610 print HMMEROUTPUT "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2611 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2612 if (substr($hmmerSubjectName,0,5) eq "1_SSU") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2613 $SSUCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2614 if ($SSUCount <= $maxCount) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2615 @bestScore[1] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2616 @bestEntry[1] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2617 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2618 if ($score > @bestScore[1]) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2619 @bestScore[1] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2620 @bestEntry[1] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2621 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2622 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2623 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2624 if (substr($hmmerSubjectName,0,5) eq "4_LSU") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2625 $LSUCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2626 if ($LSUCount <= $maxCount) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2627 @bestScore[4] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2628 @bestEntry[4] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2629 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2630 if ($score > @bestScore[4]) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2631 @bestScore[4] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2632 @bestEntry[4] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2633 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2634 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2635 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2636 if (substr($hmmerSubjectName,0,5) eq "2_5.8") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2637 $startCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2638 if ($startCount <= $maxCount) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2639 @bestScore[2] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2640 @bestEntry[2] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2641 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2642 if ($score > @bestScore[2]) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2643 @bestScore[2] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2644 @bestEntry[2] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2645 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2646 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2647 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2648 if (substr($hmmerSubjectName,0,5) eq "3_End") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2649 $endCount++;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2650 if ($endCount <= $maxCount) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2651 @bestScore[3] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2652 @bestEntry[3] = "$query\t$hmmerSubjectName\t$queryLength$stats\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2653 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2654 if ($score > @bestScore[3]) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2655 @bestScore[3] = $score;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2656 @bestEntry[3] = "$query\t$hmmerSubjectName\t$queryLength$stats\n"
f82c70f54bd7 Uploaded okorol parents: diff changeset	2657 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2658 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2659 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2660 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2661 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2662 if (substr($line,0,2) eq "//") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2663 if ($maxCount > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2664 if ($SSUCount > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2665 print HMMEROUTPUT @bestEntry[1];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2666 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2667 if ($startCount > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2668 print HMMEROUTPUT @bestEntry[2];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2669 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2670 if ($endCount > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2671 print HMMEROUTPUT @bestEntry[3];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2672 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2673 if ($LSUCount > 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2674 print HMMEROUTPUT @bestEntry[4];
f82c70f54bd7 Uploaded okorol parents: diff changeset	2675 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2676 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2677 print HMMEROUTPUT "//\n";
f82c70f54bd7 Uploaded okorol parents: diff changeset	2678 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2679 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2680 close(HMMEROUTPUT);
f82c70f54bd7 Uploaded okorol parents: diff changeset	2681 $now = localtime;
f82c70f54bd7 Uploaded okorol parents: diff changeset	2682 if ($silent == 0) {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2683 if ($strand eq "M") {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2684 print STDERR " $now : " . ucfirst($profileIndex{$profileSet}) . " analysis of main strand finished.\n"; # Print finished type
f82c70f54bd7 Uploaded okorol parents: diff changeset	2685 } else {
f82c70f54bd7 Uploaded okorol parents: diff changeset	2686 print STDERR " $now : " . ucfirst($profileIndex{$profileSet}) . " analysis of complementary strand finished.\n"; # Print finished type
f82c70f54bd7 Uploaded okorol parents: diff changeset	2687 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2688 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2689 }
f82c70f54bd7 Uploaded okorol parents: diff changeset	2690
f82c70f54bd7 Uploaded okorol parents: diff changeset	2691
f82c70f54bd7 Uploaded okorol parents: diff changeset	2692 ## Please send beers, pizzas, cakes, fruit pies, job positions and other types of feedback to:
f82c70f54bd7 Uploaded okorol parents: diff changeset	2693 ## johan.bengtsson [at] microbiology.se
f82c70f54bd7 Uploaded okorol parents: diff changeset	2694 ## Looking forward to hearing from you.... visit my website: www.microbiology.se for info on my research
f82c70f54bd7 Uploaded okorol parents: diff changeset	2695 ## //Johan Bengtsson, 2012-2014

Mercurial > repos > okorol > itsx

annotate ITSx @ 9:3610c6312b85 draft default tip