| 
3
 | 
     1 #!/usr/bin/perl
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 use strict;
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 my $directory;
 | 
| 
 | 
     6 if ($ARGV[0]){
 | 
| 
 | 
     7 	$directory = $ARGV[0];
 | 
| 
 | 
     8 }
 | 
| 
 | 
     9 else{
 | 
| 
 | 
    10 	print "You must provide as an argument a directory containing fastq.gz file for testing the presence of ITS sequences\nex: perl assignFastqByITS.pl my_fastq_dir\n";
 | 
| 
 | 
    11 	exit;
 | 
| 
 | 
    12 }
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 my %sequences =(
 | 
| 
 | 
    15 "1"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
 | 
| 
 | 
    16 "2"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
 | 
| 
 | 
    17 "3"=> "AAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
 | 
| 
 | 
    18 "4"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
 | 
| 
 | 
    19 "5"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
 | 
| 
 | 
    20 "6"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
 | 
| 
 | 
    21 "7"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
 | 
| 
 | 
    22 "8"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA"
 | 
| 
 | 
    23 );
 | 
| 
 | 
    24 
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 open(LS,"ls $directory/*.fastq.gz |");
 | 
| 
 | 
    27 while(<LS>){
 | 
| 
 | 
    28 	my $file = $_;
 | 
| 
 | 
    29 	$file =~s/\n//g;$file =~s/\r//g;
 | 
| 
 | 
    30 	foreach my $genotype(sort {$a<=>$b} keys(%sequences)){
 | 
| 
 | 
    31 		my $seq = $sequences{$genotype};
 | 
| 
 | 
    32 		my $cmd = `zgrep -c '$seq' $file`;
 | 
| 
 | 
    33 		$cmd =~s/\n//g;$cmd =~s/\r//g;
 | 
| 
 | 
    34 		print "$file	$genotype	$cmd\n";
 | 
| 
 | 
    35 	}
 | 
| 
 | 
    36 }
 | 
| 
 | 
    37 close(LS);
 |