annotate Perl/Naegleria/assignFastqByITS.pl @ 14:5a5c9a6b047b draft

Uploaded
author dereeper
date Tue, 10 Dec 2024 16:20:53 +0000
parents e42d30da7a74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
1 #!/usr/bin/perl
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
2
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
3 use strict;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
4
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
5 my $directory;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
6 if ($ARGV[0]){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
7 $directory = $ARGV[0];
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
8 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
9 else{
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
10 print "You must provide as an argument a directory containing fastq.gz file for testing the presence of ITS sequences\nex: perl assignFastqByITS.pl my_fastq_dir\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
11 exit;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
12 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
13
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
14 my %sequences =(
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
15 "1"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
16 "2"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
17 "3"=> "AAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
18 "4"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
19 "5"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
20 "6"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
21 "7"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
22 "8"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA"
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
23 );
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
24
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
25
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
26 open(LS,"ls $directory/*.fastq.gz |");
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
27 while(<LS>){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
28 my $file = $_;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
29 $file =~s/\n//g;$file =~s/\r//g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
30 foreach my $genotype(sort {$a<=>$b} keys(%sequences)){
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
31 my $seq = $sequences{$genotype};
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
32 my $cmd = `zgrep -c '$seq' $file`;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
33 $cmd =~s/\n//g;$cmd =~s/\r//g;
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
34 print "$file $genotype $cmd\n";
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
35 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
36 }
e42d30da7a74 Uploaded
dereeper
parents:
diff changeset
37 close(LS);