diff Perl/Naegleria/assignFastqByITS.pl @ 3:e42d30da7a74 draft

Uploaded
author dereeper
date Thu, 30 May 2024 11:52:25 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Perl/Naegleria/assignFastqByITS.pl	Thu May 30 11:52:25 2024 +0000
@@ -0,0 +1,37 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $directory;
+if ($ARGV[0]){
+	$directory = $ARGV[0];
+}
+else{
+	print "You must provide as an argument a directory containing fastq.gz file for testing the presence of ITS sequences\nex: perl assignFastqByITS.pl my_fastq_dir\n";
+	exit;
+}
+
+my %sequences =(
+"1"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
+"2"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
+"3"=> "AAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
+"4"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
+"5"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA",
+"6"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
+"7"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA",
+"8"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA"
+);
+
+
+open(LS,"ls $directory/*.fastq.gz |");
+while(<LS>){
+	my $file = $_;
+	$file =~s/\n//g;$file =~s/\r//g;
+	foreach my $genotype(sort {$a<=>$b} keys(%sequences)){
+		my $seq = $sequences{$genotype};
+		my $cmd = `zgrep -c '$seq' $file`;
+		$cmd =~s/\n//g;$cmd =~s/\r//g;
+		print "$file	$genotype	$cmd\n";
+	}
+}
+close(LS);