annotate MITObim_1.8.pl @ 8:e93ae4176127 draft

Uploaded a set of tools
author lijing
date Thu, 02 Nov 2017 12:48:54 -0400
parents a03d23c6ab95
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1 #! /usr/bin/perl
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
2 #
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
3 # MITObim - mitochondrial baiting and iterative mapping
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
4 # wrapper script version 1.8
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
5 # Author: Christoph Hahn, 2012-2016
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
6 # christoph.hahn@nhm.uio.no
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
7
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
8 use strict;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
9 use warnings;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
10 use Getopt::Long;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
11 use Cwd qw(abs_path);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
12 use File::Copy;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
13 use List::Util qw< min max >;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
14 use POSIX qw(strftime);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
15 use POSIX qw(ceil);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
16 use File::Path 'rmtree';
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
17
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
18 my $startiteration = 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
19 my $enditeration = 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
20
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
21 my ($quick, $noshow, $help, $strainname, $paired, $mode, $refname, $readpool, $maf, $proofreading, $readlength, $insertsize, $MM, $trim, $trimoverhang, $k_bait, $clean, $clean_interval, $min_contig_cov) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 31, 0, 2, 0);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
22 my ($miramode, $key, $val, $exit, $current_contiglength, $current_number_of_reads, $current_number_of_contigs);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
23 my $splitting = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
24 my $platform = "solexa";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
25 my $platform_settings;# = "SOLEXA";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
26 my $shme = "";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
27 my $trim_off = "";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
28 my $redirect_temp = "";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
29 my $NFS_warn_only = "";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
30 my ($mirapath, $mira, $miraconvert, $mirabait) = ("", "mira", "miraconvert", "mirabait");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
31 my (@reads, @output, @path, @current_contig_stats, @contiglengths, @number_of_reads, @number_of_contigs);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
32 my %hash;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
33 my $PROGRAM = "\nMITObim - mitochondrial baiting and iterative mapping\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
34 my $VERSION = "version 1.8\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
35 my $AUTHOR = "author: Christoph Hahn, (c) 2012-2016\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
36 my $cite = "\nif you found MITObim useful, please cite:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
37 Hahn C, Bachmann L and Chevreux B. (2013) Reconstructing mitochondrial genomes directly from genomic next-generation sequencing reads -
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
38 a baiting and iterative mapping approach. Nucl. Acids Res. 41(13):e129. doi: 10.1093/nar/gkt371\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
39 my $USAGE = "\nusage: ./MITObim.pl <parameters>
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
40 \nparameters:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
41 -start <int> iteration to start with, default=1
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
42 -end <int> iteration to end with, default=1
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
43 -sample <string> sampleID as used in initial MIRA assembly
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
44 -ref <string> referenceID as used in initial MIRA assembly
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
45 -readpool <FILE> readpool in fastq format (*.gz is also allowed)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
46 -maf <FILE> maf file from previous MIRA assembly
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
47 \noptional:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
48 --quick <FILE> starts process with initial baiting using provided fasta reference
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
49 --kbait <int> set kmer for baiting stringency (default: 31)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
50 --platform specify sequencing platform (default: 'solexa'; other options: 'iontor', '454', 'pacbio')
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
51 --denovo runs MIRA in denovo mode (default: mapping)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
52 --pair finds pairs after baiting (relies on /1 and /2 header convention for read pairs) (default: no)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
53 --verbose show detailed output of MIRA modules (default: no)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
54 --split split reference at positions with more than 5N (default: no)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
55 --help shows this helpful information
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
56 --clean retain only the last 2 iteration directories (default: no)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
57 --trimreads trim data (default: no; we recommend to trim beforehand and feed MITObim with pre trimmed data)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
58 --trimoverhang trim overhang up- and downstream of reference (default: no)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
59 --missmatch <int> number of allowed missmatches in mapping - only for illumina data (default: 15% of avg. read length)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
60 --min_cov <int> minimum average coverage of contigs to be retained (default: off)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
61 --mirapath <string> full path to MIRA binaries (only needed if MIRA is not in PATH)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
62 --redirect_tmp redirect temporary output to this location (useful in case you are running MITObim on an NFS mount)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
63 --NFS_warn_only allow MIRA to run on NFS mount without aborting - warn only (expert option - see MIRA documentation 'check_nfs')
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
64 \nexamples:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
65 ./MITObim.pl -start 1 -end 5 -sample StrainX -ref reference-mt -readpool illumina_readpool.fastq -maf initial_assembly.maf
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
66 ./MITObim.pl -end 10 --quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
67 # --proofread applies proofreading (atm only to be used if starting the process from a single short seed reference)
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
68 # --readlength <int> read length of illumina library, default=150, relevant only for proofreading
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
69 # --insert <int> insert size of illumina library, default=300, relevant only for proofreading
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
70
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
71 my $command = $0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
72 for (@ARGV){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
73 $command .= " $_";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
74 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
75
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
76 GetOptions ( "start=i" => \$startiteration,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
77 "end=i" => \$enditeration,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
78 "kbait=i" => \$k_bait,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
79 "quick=s" => \$quick,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
80 "verbose!" => \$noshow,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
81 "sample=s" => \$strainname,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
82 "paired" => \$paired,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
83 "denovo" => \$mode,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
84 "ref=s" => \$refname,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
85 "readpool=s" => \$readpool,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
86 "help!" => \$help,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
87 "clean!" => \$clean,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
88 "mirapath=s" => \$mirapath,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
89 "maf=s" => \$maf,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
90 # "proofreading!" => \$proofreading,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
91 "trimreads!" => \$trim,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
92 "trimoverhang!" => \$trimoverhang,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
93 "missmatch=i" => \$MM,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
94 "platform=s" => \$platform,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
95 # "readlength=i" => \$readlength,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
96 # "insertsize=i" => \$insertsize,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
97 "split!" => \$splitting,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
98 "min_cov=i" => \$min_contig_cov,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
99 "redirect_tmp=s" => \$redirect_temp,
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
100 "NFS_warn_only!" => \$NFS_warn_only) or die "Incorrect usage!\n$USAGE";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
101
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
102
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
103 print $PROGRAM;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
104 print $VERSION;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
105 print $AUTHOR;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
106
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
107 print $USAGE and exit if $help;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
108 print $USAGE and exit if !$readpool;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
109 unless ($quick){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
110 print $USAGE and exit if !$maf;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
111 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
112 print $USAGE and exit if !$refname;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
113
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
114 $readpool=abs_path($readpool);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
115 unless (-e $readpool){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
116 print "Cant find the readpool. Is the path correct?\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
117 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
118 if ($maf){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
119 $maf=abs_path($maf);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
120 unless (-e $maf){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
121 print "Cant find *.maf file. Is the path correct?\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
122 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
123 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
124
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
125 ($platform, $platform_settings) = &set_platform($platform);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
126
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
127 if ($quick){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
128 $quick=abs_path($quick);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
129 unless (-e $quick){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
130 print "quick option selected but is the path to the file correct?\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
131 exit 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
132 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
133 print "\nquick option selected! -maf option will be ignored (if given)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
134 $maf = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
135 $startiteration = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
136 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
137 print $USAGE and exit if ($startiteration > $enditeration);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
138 unless (((-e $maf)||($quick)) && (-e $readpool)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
139 print "\nAre readpool AND maf/reference files there?\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
140 exit 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
141 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
142 if ($mirapath){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
143 if (-e "$mirapath/mira"){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
144 print "found executables in the path specified by the user - good!\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
145 $mira = "$mirapath/mira";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
146 $miraconvert = "$mirapath/miraconvert";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
147 $mirabait = "$mirapath/mirabait";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
148 }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
149 print "somethings wrong with the path to mira.\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
150 exit 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
151 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
152 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
153
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
154
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
155 ##if not given otherwise, readlength and insertsize are set to default. automatic readlength and insertsize detection will be implemented in time.
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
156 #if (!$readlength){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
157 # $readlength = 150;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
158 #}
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
159 #if (!$insertsize){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
160 # $insertsize = 300;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
161 #}
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
162 if (!$mode){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
163 $miramode = "mapping";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
164 }else {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
165 $miramode = "denovo";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
166 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
167
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
168 if (!$trim){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
169 $trim_off = "\"--noclipping -CL:pec=no\"";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
170 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
171
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
172 my $out_command = $command; $out_command =~ s/\S+galaxy\///g;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
173 print "\nFull command run:\n$out_command\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
174 print "\nAll paramters seem to make sense:\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
175 print "startiteration: $startiteration\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
176 print "enditeration: $enditeration\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
177 print "sample: $strainname\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
178 print "refname: $refname\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
179 my $out_readpool = $readpool; $out_readpool =~ s/.*galaxy\///;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
180 print "readpool: $out_readpool\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
181 print "maf: $maf\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
182 my $out_quick = $quick; $out_quick =~ s/.*galaxy\///;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
183 print "quick: $out_quick\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
184 print "paired: $paired (off=0, on=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
185 print "assembly mode: $mode (mapping=0, denovo=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
186 print "verbose: $noshow (off=0, on=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
187 print "split: $splitting (off=0, on=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
188 print "minimum avg. coverage: $min_contig_cov (off=0)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
189 print "clean: $clean (off=0, on=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
190 print "trim reads: $trim (off=0, on=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
191 print "trim overhang: $trimoverhang (no=0, yes=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
192 print "platform: $platform\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
193 print "kmer baiting: $k_bait\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
194 #print "proofread: $proofreading (off=0, on=1)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
195
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
196 if ($proofreading){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
197 print "\nproofreading is not yet enabled in this beta version of MITObim 1.8 - it is currently being optimized for MIRA4. \nplease refer to MITObim 1.6 if you wish to use this option. Sorry for the inconvenience!\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
198 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
199
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
200 print "proofreading: on\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
201 print "readlength: $readlength\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
202 print "insertsize: $insertsize\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
203 $MM = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
204 print "number of allowed missmatches in proofreading assembly: $MM\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
205 $shme = "-AL:shme=$MM";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
206 }elsif ((!$proofreading) && (!$mode) && ($platform eq "solexa")){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
207 if ($MM == -1){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
208 print "number of missmatches in mapping assembly: default (15% of average read length loaded)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
209 $shme = "";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
210 }else {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
211 print "number of missmatches in mapping assembly: $MM\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
212 $shme = "-AL:shme=$MM";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
213 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
214 print "proofreading: off\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
215 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
216
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
217 if (!$trimoverhang){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
218 $trimoverhang = "-SB:tor=no";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
219 }else {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
220 $trimoverhang = "-SB:tor=yes";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
221 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
222
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
223 if ($redirect_temp) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
224 $redirect_temp="-DI:trt=$redirect_temp"
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
225 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
226
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
227 print "\nStarting MITObim \n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
228
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
229 my @iteration = ($startiteration .. $enditeration);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
230 foreach (@iteration){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
231 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
232 my $currentiteration = $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
233 mkdir "iteration$currentiteration" or die "MITObim will not overwrite an existing directory: iteration$currentiteration\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
234 chdir "iteration$currentiteration" or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
235 print "\n==============\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
236 print " ITERATION $currentiteration\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
237 print "==============\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
238 print strftime("%b %e %H:%M:%S", localtime) . "\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
239 # if (($proofreading) && ($currentiteration != 0)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
240 if ($proofreading){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
241 $shme = "-AL:shme=0";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
242 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
243
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
244 if ($maf){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
245 print "\nrecover backbone by running miraconvert on maf file\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
246 if ($currentiteration<2){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
247 @output= qx($miraconvert -f maf -t fasta -A "$platform_settings -CO:fnicpst=yes" $maf tmp);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
248 }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
249 @output= qx($miraconvert -f maf -t fasta -y $min_contig_cov -A "$platform_settings -CO:fnicpst=yes" $maf tmp);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
250 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
251 $exit = $? >> 8;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
252 unless (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
253 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
254 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
255 unless ($exit == 0){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
256 if (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
257 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
258 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
259 print "\nmiraconvert seems to have failed - see detailed output above\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
260 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
261 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
262 # if ( ((($mode) && ($currentiteration > 1)) && (!$quick)) || ((($mode) && ($currentiteration >= 1)) && ($quick)) ){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
263 # open(FH1,"<tmp_$strainname.unpadded.fasta") or die "$!";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
264 # }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
265 open(FH1,"<tmp_$strainname.unpadded.fasta") or die "$!\nIs the sampleID identical as in the initial MIRA assembly?\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
266 # }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
267 open(FH2,">backbone_it$currentiteration\_initial_$refname.fna") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
268 while (<FH1>) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
269 $_ =~ s/x/N/g;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
270 print FH2 $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
271 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
272 close(FH1);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
273 close(FH2);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
274 unlink glob ("tmp*");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
275 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
276 MIRABAIT:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
277 unless ($maf){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
278 print "\nquick option baits reads from provided reference in iteration 0\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
279 copy("$quick", "backbone_it$currentiteration\_initial_$refname.fna") or die "copy failed: $!";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
280 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
281 &check_ref_length("backbone_it$currentiteration\_initial_$refname.fna","temp_baitfile.fasta",29800,$k_bait);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
282
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
283 if ($splitting){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
284 &remove_unmapped_contigs("temp_baitfile_backup.fasta","temp_baitfile.fasta");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
285 if (-e "temp_baitfile_backup.fasta"){ #this file has only been created if splitting contigs was necessary
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
286 copy "temp_baitfile.fasta","backbone_it$currentiteration\_initial_$refname.fna";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
287 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
288 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
289 print "\nfishing readpool using mirabait (k = $k_bait)\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
290
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
291 # @output = (`mirabait -k $k_bait -n 1 temp_baitfile.fasta $readpool $strainname-$refname\_in.$platform 2>&1`);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
292 @output = qx($mirabait -k $k_bait -n 1 temp_baitfile.fasta $readpool $strainname-readpool-it$currentiteration);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
293 $exit = $? >> 8;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
294 unless (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
295 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
296 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
297 if (!-s "$strainname-readpool-it$currentiteration.fastq"){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
298 print "\nyour readpool does not contain any reads with reasonable match (k = $k_bait) to your reference - Maybe you ll want to different settings or even a different reference?\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
299 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
300 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
301 unless ($exit == 0){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
302 if (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
303 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
304 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
305 print "\nmirabait seems to have failed - see detailed output above\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
306 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
307 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
308
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
309 FINDPAIRS:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
310
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
311 unless (!$paired){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
312 print "\nfinding pairs for baited reads\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
313 copy ("$strainname-readpool-it$currentiteration.fastq", "$strainname-readpool-it$currentiteration-se.fastq") or die "copy failed: $!";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
314 open(FH1,"<$strainname-readpool-it$currentiteration.fastq") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
315 open(FH2,">list");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
316 my $index=1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
317 while (<FH1>) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
318 if ($index % 8 ==1 || $index % 8 ==5) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
319 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
320 $_ =~ s/@//g;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
321 ($key, $val) = split /\//;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
322 $hash{$key} .= exists $hash{$key} ? ",$val" : $val;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
323 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
324 $index++;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
325 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
326
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
327 for (keys %hash){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
328 $_ =~ s/$/\/1/g;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
329 print FH2 "$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
330 $_ =~ s/1$/2/g;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
331 print FH2 "$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
332 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
333 close(FH1);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
334 close(FH2);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
335 # @output = (`miraconvert -f fastq -t fastq -n list $readpool $strainname-$refname\_in.$platform 2>&1`);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
336 @output = qx($miraconvert -f fastq -t fastq -n list $readpool $strainname-readpool-it$currentiteration );
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
337 $exit = $? >> 8;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
338 unless (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
339 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
340 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
341 unless ($exit == 0){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
342 if (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
343 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
344 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
345 print "\nmiraconvert seems to have failed - see detailed output above\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
346 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
347 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
348 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
349 unlink("list");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
350
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
351 MIRA:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
352 print "\nrunning $miramode assembly using MIRA\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
353 &create_manifest($currentiteration,$strainname,$refname,$miramode,$trim_off,$platform_settings,$shme,$paired,$trimoverhang,"$strainname-readpool-it$currentiteration.fastq","backbone_it$currentiteration\_initial_$refname.fna", $redirect_temp, $NFS_warn_only);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
354 @output = qx($mira manifest.conf );
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
355
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
356 $exit = $? >> 8;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
357 unless (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
358 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
359 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
360 unless ($exit == 0){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
361 if (!$noshow){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
362 print "@output\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
363 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
364 print "\nMIRA seems to have failed - see detailed output above\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
365 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
366 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
367
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
368 if ($redirect_temp) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
369 my $tmp_link = readlink("$strainname-$refname\_assembly/$strainname-$refname\_d_tmp");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
370 rmtree ("$strainname-$refname\_assembly/$strainname-$refname\_d_tmp") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
371 mkdir "$strainname-$refname\_assembly/$strainname-$refname\_d_tmp/" or die $1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
372 my @files = glob("$tmp_link/*");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
373 for (@files) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
374 move ("$_", "$strainname-$refname\_assembly/$strainname-$refname\_d_tmp/") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
375 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
376 rmtree ("$tmp_link") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
377 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
378
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
379 @path = abs_path;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
380 push (@path, "/$strainname-$refname\_assembly/$strainname-$refname\_d_results/$strainname-$refname\_out.maf");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
381 $maf = join("",@path);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
382 unless (-e $maf){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
383 print "maf file is not there \n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
384 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
385 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
386
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
387 # $current_contiglength = &get_contig_length("$strainname-$refname\_assembly/$strainname-$refname\_d_info/$strainname-$refname\_info_contigstats.txt");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
388 # $current_number_of_reads = (&get_number_of_reads("$strainname-$refname\_assembly/$strainname-$refname\_d_info/$strainname-$refname\_info_contigstats.txt") - 1);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
389
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
390 @current_contig_stats = &get_contig_stats("$strainname-$refname\_assembly/$strainname-$refname\_d_info/$strainname-$refname\_info_contigstats.txt");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
391 if (((scalar @current_contig_stats > 3) || ($current_contig_stats[0] > 1)) && ($proofreading)) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
392 print "assembly consists of more than one contigs - this is atm not permitted in proofreading mode. Sorry!\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
393 exit 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
394 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
395
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
396 PROOFREAD:
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
397 # if (($proofreading) && ($currentiteration >= 1)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
398 if ($proofreading){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
399 print "proofreading option is currently disabled in this beta version of MITObim 1.7 - sorry for the inconvenience!\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
400 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
401 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
402
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
403 $current_number_of_contigs = shift @current_contig_stats;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
404 $current_number_of_reads = shift @current_contig_stats;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
405 if (!$mode){ #in mapping assemblies the reference is counted as one read
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
406 $current_number_of_reads -= $current_number_of_contigs;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
407 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
408 push (@number_of_contigs, $current_number_of_contigs);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
409 push (@number_of_reads, $current_number_of_reads);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
410 print "readpool contains $current_number_of_reads reads\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
411 print "assembly contains $current_number_of_contigs contig(s)\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
412 if (scalar @current_contig_stats == 1){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
413 print "contig length: $current_contig_stats[0]\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
414 }elsif (scalar @current_contig_stats == 3){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
415 print "min contig length: ".$current_contig_stats[0]." bp\nmax contig length: ".$current_contig_stats[1]." bp\navg contig length: ".sprintf("%.0f", $current_contig_stats[2])." bp\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
416 print "find details on individual contigs in: ". abs_path . "/$strainname-$refname\_assembly/$strainname-$refname\_d_info/$strainname-$refname\_info_contigstats.txt\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
417 # printf("%1f",$current_contig_stats[2])."\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
418 }else {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
419 print "somethings wrong with your contig stats. Sorry!\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
420 exit 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
421 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
422
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
423 if ($clean){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
424 &clean($clean_interval, $currentiteration);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
425 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
426 # my $path=abs_path; print "Now at $path\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
427 system("cp $strainname-$refname\_assembly/$strainname-$refname\_d_results/$strainname-$refname\_out_AllStrains.unpadded.fasta ../finaloutput.fasta");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
428 system("sed -i 's/_bb.*/_mitobim_out_$currentiteration\_iterations/' ../finaloutput.fasta");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
429 if ($number_of_reads[-2]){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
430 if ($number_of_reads[-2] >= $number_of_reads[-1]){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
431 print "\nMITObim has reached a stationary read number after $currentiteration iterations!!\n$cite";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
432 print strftime("%b %e %H:%M:%S", localtime) . "\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
433 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
434 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
435 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
436 chdir ".." or die "Failed to go to parent directory: $!";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
437 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
438 print "\nsuccessfully completed $enditeration iterations with MITObim! " . strftime("%b %e %H:%M:%S", localtime) . "\n$cite";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
439
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
440 #
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
441 #
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
442 ###SUBROUTINES
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
443 #
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
444 #
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
445 #
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
446
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
447 sub set_platform{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
448 my @platforms = ('solexa', '454', 'iontorrent', 'pacbio');
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
449 my %pf_settings = ("solexa" => "SOLEXA_SETTINGS", "454" => "454_SETTINGS", "iontorrent" => "IONTOR_SETTINGS", "pacbio" => "PCBIOHQ_SETTINGS -CO:mrpg=5");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
450 my $user_setting = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
451 my @user_choice;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
452 for (@platforms){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
453 if ( $_ =~ /^$user_setting/ ){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
454 push(@user_choice, $_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
455 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
456 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
457
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
458 if (scalar(@user_choice) > 1){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
459 print "\nYour platform choice was ambiguous: $user_setting could be: @user_choice - Please try again\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
460 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
461
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
462 } elsif (scalar(@user_choice) == 0){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
463 print "\nPlease specify a valid sequencing platform - not specifying anything will default into: $platforms[0]\n\navailable options are:\n@platforms\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
464 exit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
465 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
466
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
467 return ($user_choice[0], $pf_settings{$user_choice[0]})
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
468 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
469
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
470 sub get_contig_stats{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
471 my $contig = $_[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
472 my @array;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
473 my @contiglength;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
474 my (@readnumber, @stats);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
475 my $readssum = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
476 open (CONTIGSTATS,"<$contig") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
477 while (<CONTIGSTATS>){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
478 unless ($_ =~ /#/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
479 @array = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
480 push (@contiglength, $array[1]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
481 push (@readnumber, $array[3]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
482 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
483 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
484 close (CONTIGSTATS);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
485 if (scalar @readnumber == 1){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
486 push (@stats, (scalar @readnumber, $readnumber[0], $contiglength[0])); #@stats contains: number of contigs, total number of reads used to build the contigs, length of contig
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
487 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
488 elsif (scalar @readnumber > 1){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
489 $readssum += $_ for @readnumber;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
490 my $minlength = min @contiglength;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
491 my $maxlength = max @contiglength;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
492 my @avglength = &standard_deviation(@contiglength);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
493 push (@stats, (scalar @readnumber, $readssum, $minlength, $maxlength, $avglength[0])); #@stats contains: number of contigs, total number of reads used to build the contigs, minimal, maximal, avg length of contigs
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
494 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
495 return @stats;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
496 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
497
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
498
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
499 #sub get_contig_length{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
500 # my $contig = $_[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
501 # my @contiglength;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
502 # open (CONTIGSTATS,"<$contig") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
503 # while (<CONTIGSTATS>){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
504 # unless ($_ =~ /#/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
505 # @contigslength = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
506 # }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
507 # }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
508 # close (CONTIGSTATS);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
509 # return $contiglength[1];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
510 #}
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
511
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
512 #sub get_number_of_reads{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
513 # my $contig = $_[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
514 # my @contigstats;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
515 # open (CONTIGSTATS,"<$contig") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
516 # while (<CONTIGSTATS>){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
517 # unless ($_ =~ /#/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
518 # @contigstats = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
519 # }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
520 # }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
521 # close (CONTIGSTATS);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
522 # return $contigstats[3];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
523 #}
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
524
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
525 sub proofread {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
526 my $zero_MM = $_[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
527 my $readtaglist_FH = $_[1];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
528 my $contiglength = $_[2];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
529 my $elevated_cov_lower_start = $_[3];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
530 my $elevated_cov_lower_end = $_[4];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
531 my $elevated_cov_upper_start = $_[5];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
532 my $elevated_cov_upper_end = $_[6];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
533 my $lower_limit = $_[7];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
534 # my $lower_limit = 200;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
535 my $lower_main_limit = $_[8];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
536 # my $lower_main_limit = 500;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
537 my $upper_limit = $contiglength - $lower_limit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
538 my $upper_main_limit = $contiglength - $lower_main_limit;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
539 my @readtaglist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
540 my $ref;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
541 my $junk;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
542 my $current_id;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
543 my %count;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
544 my @readid_good;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
545 my @readid_bad;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
546 my @reads;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
547 my @readlist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
548 my @readlist_good;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
549 my @readlist_proofread;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
550 my @total_readlist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
551 my @singleton;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
552 my @singletons;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
553 my @taglist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
554 my @taglist_line;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
555 my @readtaglist_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
556 my @readtaglist_upper;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
557 my @read_ids_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
558 my @read_ids_all;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
559 my @read_ids_upper;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
560 my %ids =();
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
561 my @unsorted;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
562 my $min;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
563 my $max;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
564 my $tag;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
565
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
566
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
567 print "\nlower limit: $lower_limit\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
568 print "upper limit: $upper_limit\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
569 print "lower main limit: $lower_main_limit\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
570 print "upper main limit: $upper_main_limit\n\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
571 open (TAGLIST,"<$readtaglist_FH") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
572 while (<TAGLIST>){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
573 push (@readtaglist, "$_");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
574 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
575 close (TAGLIST);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
576
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
577 for (@readtaglist){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
578 @taglist_line = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
579 unless ($taglist_line[0] =~ /#/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
580 $ref = join ("\t", $taglist_line[0], $taglist_line[6]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
581 push (@read_ids_all, $ref);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
582
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
583 if (($taglist_line[1] <= $lower_limit) || ($taglist_line[2] <= $lower_limit)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
584 # if ((($taglist_line[1] <= $lower_limit) || (($taglist_line[1] >= $coverage_limits_lower[0])&&($taglist_line[1] <= $coverage_limits_lower[1]))) || ($taglist_line[2] <= $lower_limit)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
585 $ref = join ("\t", $taglist_line[0], $taglist_line[6]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
586 push (@read_ids_lower, $ref);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
587 }elsif (($taglist_line[1] >= $upper_limit) || ($taglist_line[2] >= $upper_limit)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
588 $ref = join ("\t", $taglist_line[0], $taglist_line[6]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
589 push (@read_ids_upper, $ref);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
590 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
591 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
592 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
593 %ids = map { $_ => 1 } @read_ids_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
594 my @unique_lower = keys %ids;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
595 %ids = map { $_ => 1 } @read_ids_upper;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
596 my @unique_upper = keys %ids;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
597 %ids = map { $_ => 1 } @read_ids_all;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
598 my @unique_all = keys %ids;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
599 for (@unique_all) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
600 my @junk = split /\//;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
601 push (@reads, $junk[0]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
602 @junk = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
603 push (@total_readlist, $junk[1]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
604 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
605
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
606 map { $count{$_}++ } @reads;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
607 map {if ($count{$_} == 2){ @readid_good = split /\t/; push(@readlist, "$readid_good[1]");} elsif ($count{$_} == 1) { push(@readid_bad, "$_");}} keys (%count);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
608 @reads = {};
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
609 undef %count;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
610
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
611 for (@readlist){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
612 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
613 $current_id = $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
614 my @pairs_lower = grep { $_ =~ /$current_id/} @unique_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
615 my @pairs_upper = grep{ $_ =~ /$current_id/} @unique_upper;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
616 # print "good id: $current_id\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
617 my $count_lower = scalar @pairs_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
618 my $count_upper = scalar @pairs_upper;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
619 # print "count lower: $count_lower\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
620 # print "count upper: $count_upper\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
621 unless ((scalar @pairs_lower == 2) || (scalar @pairs_upper == 2)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
622 push (@readlist_good, "$current_id");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
623 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
624 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
625 for (@readid_bad){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
626 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
627 @unsorted = ();
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
628 ($junk, $current_id) = split (/\t/);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
629 print "current ID: $current_id\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
630 @singleton = grep { $_ =~ /$current_id/} @total_readlist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
631 for (@singleton){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
632 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
633 $tag = $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
634 @taglist = grep { $_ =~ /$tag/} @readtaglist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
635 # print "taglist: @taglist\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
636 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
637 for (@taglist) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
638 @taglist_line = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
639 push(@unsorted, $taglist_line[1], $taglist_line[2]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
640 $max = max @unsorted;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
641 $min = min @unsorted;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
642 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
643 # print "unsorted: @unsorted\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
644 print "read mapping from $min to $max\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
645 # print "min: $min\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
646 # print "max: $max\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
647
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
648 if ($min <= $lower_limit){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
649 print "orphan discarded! min<lowerlimit\n----------------------------------------------\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
650 }elsif ($max >= $upper_limit){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
651 print "orphan discarded! max>upperlimit\n----------------------------------------------\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
652 }elsif (($min >= $lower_main_limit) && ($max <= $upper_main_limit)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
653 print "orphan discarded! lower_main_limit<min-max<upper_main_limit\n----------------------------------------------\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
654 }elsif (($min >= $elevated_cov_lower_start) && ($min <= $elevated_cov_lower_end - ($lower_limit / 2))){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
655 print "orphan discarded! increased_coverage_lower_start<min<increased_coverage_lower_end\n----------------------------------------------\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
656 }elsif (($max >= ($elevated_cov_upper_start + ($lower_limit / 2))) && ($max <= $elevated_cov_upper_end)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
657 print "orphan discarded! increased_coverage_upper_start<max<increased_coverage_upper_end\n----------------------------------------------\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
658 }else {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
659 push(@singletons, "@singleton\n");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
660 print "orphan resurrected! \n----------------------------------------------\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
661 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
662 # print "contiglength: $contiglength\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
663 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
664
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
665 for (@singletons){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
666 my @resurrection = split /\//;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
667 push (@readlist_good, $resurrection[0]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
668 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
669 for (@readlist_good){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
670 $_ =~ s/$/\/1\n/g;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
671 push(@readlist_proofread, $_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
672 $_ =~ s/1$/2/g;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
673 push(@readlist_proofread, $_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
674 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
675 return @readlist_proofread;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
676 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
677
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
678 sub standard_deviation {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
679 my(@numbers) = @_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
680 #Prevent division by 0 error in case you get junk data
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
681 return undef unless(scalar(@numbers));
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
682
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
683 # Step 1, find the mean of the numbers
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
684 my $total1 = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
685 foreach my $num (@numbers) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
686 if (!$num){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
687 $num = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
688 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
689 $total1 += $num;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
690 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
691 my $mean1 = $total1 / (scalar @numbers);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
692 push (my @stdev, "$mean1");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
693 # Step 2, find the mean of the squares of the differences between each number and the mean
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
694 my $total2 = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
695 foreach my $num (@numbers) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
696 if (!$num){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
697 $num = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
698 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
699 $total2 += ($mean1-$num)**2;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
700 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
701 my $mean2 = $total2 / (scalar @numbers);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
702
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
703 # Step 3, standard deviation is the square root of the above mean
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
704 my $std_dev = sqrt($mean2);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
705 push (@stdev, "$std_dev");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
706 return @stdev;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
707 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
708
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
709 sub assess_coverage{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
710 my $readtaglist_FH = $_[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
711 my @readtaglist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
712 my $from =$_[1];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
713 my $to = $_[2];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
714 my $where = $_[3];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
715 my @taglist_line;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
716 my @coverage_array_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
717 my @coverage_array_upper;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
718 my @read_ids_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
719 my @read_ids_upper;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
720 my %ids;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
721 my @taglist;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
722 my @unsorted;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
723 my $min;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
724 my $max;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
725 my %coverage;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
726 my @allnums;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
727 my @coverage_change_position;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
728 my @coverage_limits;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
729
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
730 # print "assessing coverage from position $from to position $to\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
731 open (TAGLIST,"<$readtaglist_FH") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
732 while (<TAGLIST>){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
733 push (@readtaglist, "$_");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
734 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
735 close (TAGLIST);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
736
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
737 for (@readtaglist){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
738 @taglist_line = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
739 unless ($taglist_line[0] =~ /#/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
740 if ((($taglist_line[1] >= $from) && ($taglist_line[1] <= $to)) || (($taglist_line[2] >= $from) && ($taglist_line[2] <= $to))){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
741 push (@coverage_array_lower, "$_");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
742 push (@read_ids_lower, $taglist_line[6]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
743 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
744 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
745 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
746 %ids = map { $_ => 1 } @read_ids_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
747 my @unique_lower = keys %ids;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
748 for (@unique_lower){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
749 my @current_id = $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
750 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
751 @unsorted = ();
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
752 for (@current_id){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
753 my $current_id = $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
754 @taglist = grep { $_ =~ /$current_id/} @coverage_array_lower;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
755 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
756 for (@taglist) {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
757 @taglist_line = split /\t/;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
758 push(@unsorted, $taglist_line[1], $taglist_line[2]);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
759 $max = max @unsorted;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
760 $min = min @unsorted;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
761 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
762 my @nums = ($min .. $max);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
763 for (@nums){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
764 push (@allnums, "$_");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
765 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
766 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
767 %coverage = map { $_ => 0 } @allnums;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
768 map { $coverage{$_}++ } @allnums;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
769 open (OUT,">out-$where.csv");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
770
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
771 ########## detecting coverage peak
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
772 my $max_cov = 0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
773 my $max_cov_position;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
774 my @cumulative_coverage;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
775 map { unless (!$coverage{$_}){print OUT "$_,$coverage{$_}\n"; push (@cumulative_coverage, "$coverage{$_}"); if ($coverage{$_} > $max_cov){$max_cov = $coverage{$_}; $max_cov_position = $_; }}} ($from..$to);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
776 my @average_coverage = &standard_deviation(@cumulative_coverage);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
777 my $coverage_factor = $max_cov / $average_coverage[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
778 open (OUT,">>out-$where.csv");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
779 print OUT "\nmaximum coverage is $max_cov at position $max_cov_position\naverge coverage is: $average_coverage[0], sd: $average_coverage[1]\nfactor $coverage_factor\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
780 close (OUT);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
781
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
782 ######### detecting rapid changes in coverage
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
783 for ($from..($to - 10)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
784 my $position = $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
785 my $cov = $coverage{$position};
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
786 unless (!$cov){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
787 my @positions = ();
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
788 push (@positions, "$cov");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
789 for (1 .. 10){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
790 my $pos_plus = $position + $_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
791 if ($coverage{$pos_plus}){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
792 push (@positions, "$coverage{$pos_plus}");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
793 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
794 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
795 my @stdev = &standard_deviation(@positions);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
796 if ($stdev[1] > 6.0){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
797 print "positions ($position): @positions -> stdev: $stdev[1]\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
798 push (@coverage_change_position, $position);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
799 }elsif ($stdev[1] >= 4.5){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
800 print "positions ($position): @positions -> stdev: $stdev[1]\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
801 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
802 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
803 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
804 if (@coverage_change_position){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
805 print "positions with rapidly changing coverage detected: @coverage_change_position\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
806 my $start = min @coverage_change_position;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
807 my $end = max @coverage_change_position;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
808 push (@coverage_limits, "$start", "$end");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
809 print "set limits from $coverage_limits[0] to $coverage_limits[1]\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
810 }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
811 print "no irregularities in coverage detected\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
812 push (@coverage_limits, "0", "0");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
813 return @coverage_limits;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
814 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
815
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
816
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
817 ###### assessing whether coverage peak lies within putative conserved region, if yes accept prediction; if no, reject conserved region
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
818 if (($coverage_factor >= 1.6) && (($coverage_limits[0] < $max_cov_position) && ( $max_cov_position < $coverage_limits[1]))){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
819
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
820 print "suspicious coverage peak detected within the predicted limits\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
821 }else {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
822 print "no coverage peak detected within predicted limits - rejecting limits\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
823 @coverage_limits = ();
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
824 push (@coverage_limits, "0", "0");
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
825 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
826 return @coverage_limits;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
827
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
828 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
829
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
830 sub check_ref_length{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
831 my $ref=$_[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
832 my $output_filename=$_[1];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
833 my $critical=$_[2];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
834 my $kmer = $_[3];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
835 my @header;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
836 my $header_count=0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
837 my (@sequence,@temp_output,@final_output);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
838 my $full_sequence;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
839 open(REF,"<$ref") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
840 while(<REF>){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
841 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
842 if ($_ =~ /^>/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
843 push(@header,$_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
844 # print "found header:\n$header[$header_count]\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
845 $header_count++;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
846 # print "header count: $header_count\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
847 if (@sequence){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
848 @temp_output=&finalize_sequence($critical,$header[-2],$kmer,@sequence);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
849 for (@temp_output){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
850 push(@final_output,$_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
851 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
852 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
853 undef @sequence;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
854 }elsif ($_ =~ /[a-zA-Z]/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
855 # print "found sequence:\n$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
856 push(@sequence,$_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
857 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
858 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
859 @temp_output=&finalize_sequence($critical,$header[-1],$kmer,@sequence);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
860 for (@temp_output){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
861 push(@final_output,$_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
862 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
863 # print "result:\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
864 open (OUT,">$output_filename") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
865 for(@final_output){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
866 # print "$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
867 print OUT "$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
868 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
869 close REF;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
870 close OUT;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
871
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
872 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
873
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
874 sub finalize_sequence{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
875 my $critical = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
876 my $header = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
877 my $kmer = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
878 my $full_sequence=join("",@_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
879 my $factor;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
880 my @output;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
881 if (!$critical){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
882 $factor=0;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
883 }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
884 $factor=ceil(length($full_sequence)/$critical);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
885 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
886 if ($factor == 1){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
887 push(@output,$header);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
888 push(@output,$full_sequence);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
889 }else{ #too long
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
890 print "\nreference is too long for mirabait to be handled in one go -> will be split into sub-sequences\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
891 $header=substr $header, 1;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
892 for (my $i=0; $i<$factor; $i++){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
893 unless ((length(substr $full_sequence, $i*$critical, $critical+$kmer)-$kmer)<0){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
894 push(@output,">sub$i\_" .$header);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
895 push(@output,substr $full_sequence, $i*$critical, $critical+$kmer);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
896 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
897 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
898 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
899 return @output;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
900 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
901
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
902 sub create_manifest {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
903 my ($iter, $sampleID, $refID, $mmode, $trim, $platform, $solexa_missmatches, $pair, $overhang, $reads, $ref, $redirect, $NFS_warn);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
904 $iter = $_[0];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
905 $sampleID = $_[1];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
906 $refID = $_[2];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
907 $mmode = $_[3];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
908 $trim = $_[4];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
909 $platform = $_[5];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
910 $solexa_missmatches = $_[6];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
911 $pair = $_[7];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
912 $overhang = $_[8];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
913 $reads = $_[9];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
914 $ref = $_[10];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
915 $redirect = $_[11];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
916 $NFS_warn = $_[12];
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
917
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
918 if ($NFS_warn){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
919 $NFS_warn = ":cnfs=warn"
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
920 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
921
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
922 open (MANIFEST,">manifest.conf") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
923 print MANIFEST "#manifest file for iteration $iter created by MITObim\n\nproject = $sampleID-$refID
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
924 \njob = genome,$mmode,accurate
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
925 \nparameters = -NW:mrnl=0$NFS_warn -AS:nop=1 $redirect $overhang $platform $trim -CO:msr=no $solexa_missmatches\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
926 my @technology = split("_",$platform);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
927 #-notraceinfo -
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
928 if ($mmode eq "mapping"){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
929 print MANIFEST "\nreadgroup\nis_reference\ndata = $ref\nstrain = $refID\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
930 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
931 print MANIFEST "\nreadgroup = reads\ndata = $reads\ntechnology = $technology[0]";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
932 if ($pair){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
933 # print MANIFEST "\nsegmentplacement = ---> <--- infoonly";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
934 print MANIFEST "\nsegmentplacement = ---> <--- exclusion_criterion";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
935 print MANIFEST "\ntemplatesize = -1 600 exclusion_criterion";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
936 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
937 print MANIFEST "\nstrain = $sampleID\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
938 close MANIFEST;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
939 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
940 sub clean {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
941 my $interval = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
942 my $cur = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
943 my $dir = $cur-$interval;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
944 my $path=abs_path;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
945 if (-d "$path/../iteration$dir"){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
946 print "\nnow removing directory iteration$dir\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
947 rmtree ("$path/../iteration$dir") or die $!;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
948 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
949 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
950
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
951 sub remove_unmapped_contigs{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
952 my $file = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
953 my $out = shift;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
954 my ($head, $seq,$length);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
955 my @split;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
956 my ($dropped,$split,$trim)=(0,0,0);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
957 open (FASTA,"<$out") or die $!."\ncould not open $file for reading\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
958 while (<FASTA>){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
959 chomp;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
960 if ($_ =~ /^>/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
961 $head=$_;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
962 }else {
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
963 if ($_ =~ /X/){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
964 $dropped++;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
965 print "drop:\n$head\n$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
966 }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
967 $length=length($_);
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
968 $_ =~ s/^N+//g; #remove all Ns from the beginning of the sequence
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
969 $_ =~ s/N+$//g; #remove all Ns from the end of the sequence
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
970 if ($length != length($_)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
971 $trim++;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
972 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
973 @split=split(/N{3,}/); #split at every occurance of 3 or more Ns
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
974 if (@split > 1){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
975 print "contig $head has been split into ".scalar @split." sub-contigs\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
976 $split++;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
977 for (my $i=0; $i<@split; $i++){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
978 # print "$head\_$i\n$split[$i]\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
979 $seq.="$head\_$i\n$split[$i]\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
980 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
981 }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
982 # print "$head\n$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
983 $seq.="$head\n$_\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
984 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
985 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
986 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
987 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
988 close FASTA;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
989 if (($dropped) || ($split) || ($trim)){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
990 print "creating backup of $out before changing it\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
991 copy $out,$file;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
992 open (OUT,">$out") or die $!."\ncould not open $out for writing\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
993 print OUT "$seq\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
994 close OUT;
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
995 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
996
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
997 if ($dropped){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
998 print "\nremoved $dropped contigs from baitfile because they did not receive any hits\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
999 }else{
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1000 print "\nno need to remove any sequences from the baitfile\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1001 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1002
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1003 if (!$split){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1004 print "\nno need to split any contigs\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1005 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1006 if ($trim){
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1007 print "\n$trim contigs had N's trimmed off their ends\n";
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1008 }
a03d23c6ab95 MitoBim and interleave
lijing
parents:
diff changeset
1009 }