3
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4
|
|
5 my $directory;
|
|
6 if ($ARGV[0]){
|
|
7 $directory = $ARGV[0];
|
|
8 }
|
|
9 else{
|
|
10 print "You must provide as an argument a directory containing fastq.gz file for testing the presence of ITS sequences\nex: perl assignFastqByITS.pl my_fastq_dir\n";
|
|
11 exit;
|
|
12 }
|
|
13
|
|
14 my %sequences =(
|
|
15 "1"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
|
|
16 "2"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
|
|
17 "3"=> "AAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
|
|
18 "4"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
|
|
19 "5"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
|
|
20 "6"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
|
|
21 "7"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
|
|
22 "8"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA"
|
|
23 );
|
|
24
|
|
25
|
|
26 open(LS,"ls $directory/*.fastq.gz |");
|
|
27 while(<LS>){
|
|
28 my $file = $_;
|
|
29 $file =~s/\n//g;$file =~s/\r//g;
|
|
30 foreach my $genotype(sort {$a<=>$b} keys(%sequences)){
|
|
31 my $seq = $sequences{$genotype};
|
|
32 my $cmd = `zgrep -c '$seq' $file`;
|
|
33 $cmd =~s/\n//g;$cmd =~s/\r//g;
|
|
34 print "$file $genotype $cmd\n";
|
|
35 }
|
|
36 }
|
|
37 close(LS);
|