Previous changeset 52:945d903dad5a (2017-12-01) |
Commit message:
Deleted selected files |
removed:
align.pm |
b |
diff -r 945d903dad5a -r bd91551eb3e1 align.pm --- a/align.pm Fri Dec 01 09:33:36 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,456 +0,0 @@\n-package align;\n-\n-use strict;\n-use warnings;\n-use File::Basename;\n-use String::Random;\n-\n-use FindBin;\n-use lib $FindBin::Bin;\n-use Rcall qw ( histogram );\n-\n-use Exporter;\n-our @ISA = qw( Exporter );\n-our @EXPORT = qw( &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam );\n-\n-sub to_build\n-{\n- my ( $toBuildTabP, $log, $newdir ) = @_;\n-\n- foreach my $pairs ( @{ $toBuildTabP } ) \n- {\n- if ( $pairs->[0] == 1 ) \n- { \n- my $sym = $newdir.basename(${$pairs->[1]}).\'_symlink.fa\';\n- symlink( ${$pairs->[1]}, $sym );\n- ${$pairs->[1]} = $sym;\n- build_index ( ${$pairs->[1]}, $log );\n- } \n- }\n-}\n-\n-sub build_index\n-{\n-\tmy $to_index = shift;\n-\tmy $log = shift;\n-\tmy $index_log = $to_index.\'_index.err\';\n-\t`bwa index $to_index 2> $index_log`;\n-\tprint $log "Creating index for $to_index\\n";\n-}\n-\n-sub get_unique\n-{\n-\tmy ( $sam, $s_uni, $out_prefix, $col_prefix, $details, $report ) = @_;\n-\n-\tmy $fout = $col_prefix.\'_all_mappers.fastq\'; \n-\tmy $funi = $col_prefix.\'_unique_mappers.fastq\';\n-\tmy $frej = $col_prefix.\'_unmapped.fastq\';\n-\t\n- my $repartition = $out_prefix.\'distribution.txt\';\n-\tmy $png_rep = $out_prefix.\'distribution.png\';\n-\tmy ( %duplicates, %genome_hits) ;\n-\n-\t#alignement to the first reference\n-\tmy @return = sam_parse( $sam, $fout, $funi, $frej, $s_uni, \\%duplicates, \\%genome_hits, $report );\n-\tmy $ref_fai = $return[4];\n-\tmy $mappers = $return[5];\n-\tmy $mappers_uni = $return[6];\n-\tmy $size_mappedHashR = $return[7];\n-\n-\tif ( $details == 1 )\n-\t{\n-\t\t#print number of duplicates and hits number\n-\t\tmy ($pourcentage, $total) =(0,0);\n-\n-\t\t$total += $_ foreach values %{$size_mappedHashR};\n-\t\topen (my $rep, \'>\'.$repartition) || die "cannot create $repartition $!\\n";\n-\t\tprint $rep "size\\tnumber\\tpercentage\\n";\n-\t\tforeach my $k (sort{$a cmp $b} keys (%{$size_mappedHashR}))\n-\t\t{\n-\t\t\t$pourcentage = 0;\n-\t\t\t$pourcentage = $size_mappedHashR->{$k} / $total * 100 unless $total ==0;\n-\n-\t\t\tprint $rep "$k\\t$size_mappedHashR->{$k}\\t";\n-\t\t\tprintf $rep "%.2f\\n",$pourcentage;\n-\t\t}\n-\n-\t\thistogram($size_mappedHashR, $png_rep, $total);\n-\n-\n-\t\tmy $dup = $out_prefix.\'dup_mapnum.txt\';\n-\t\tmy $dup_u = $out_prefix .\'dup_unique.txt\';\n-\t\tmy $dup_r = $out_prefix .\'dup_nonmapp.txt\';\n-\t\topen(my $tab,">".$dup) || die "cannot open output txt file\\n";\n-\t\topen(my $tab_r,">".$dup_r) || die "cannot open output txt file\\n";\n-\t\topen(my $tab_u,">".$dup_u) || die "cannot open output txt file\\n";\n-\t\tprint $tab "sequence\\tcount\\tmapnum\\n";\n-\t\tprint $tab_u "sequence\\tcount\\n";\n-\t\tprint $tab_r "sequence\\tcount\\n";\n-\t\tforeach my $k (sort {$duplicates{$b} <=> $duplicates{$a}}keys %duplicates)\n-\t\t{\n-\t\t\t$duplicates{$k} = 0 unless exists($duplicates{$k});\n-\t\t\t$genome_hits{$k} = 0 unless exists($genome_hits{$k});\n-\t\t\tif ($genome_hits{$k} != 0) { print $tab $k."\\t".$duplicates{$k}."\\t".$genome_hits{$k}."\\n"; }\n-\t\t\telse {print $tab_r $k."\\t".$duplicates{$k}."\\n";}\n-\t\t\tif ($genome_hits{$k} == 1) { print $tab_u $k."\\t".$duplicates{$k}."\\n"; }\n-\t\t}\n-\tclose $dup; close $dup_r; close $dup_u;\n-\t}\n-\treturn ( $ref_fai, $mappers, $mappers_uni );\n-}\n-\n-sub sam_parse\n-{\n-\tmy ( $sam, $fastq_accepted, $fastq_accepted_unique, $fastq_rejected, $sam_unique, $duplicate_hashR, $best_hit_number_hashR, $report ) = @_ ;\n-\tmy ($reads, $mappers, $mappersUnique, @garbage, %size_num, %size_num_spe, %number, %numberSens, %numberReverse, %unique_number, %numberNM, %numberM, %size);\n-\t$mappers = $mappersUnique = $reads = 0;\n-\n-\topen my $fic, \'<\', $sam || die "cannot open $sam $!\\n";\n-\topen my $accepted, \'>\', $fastq_accepted || die "cannot create $fastq_accepted $! \\n";\n-\topen my $unique, \'>\', $fastq_accepted_unique || die "cannot create $fastq_accepted_unique $! \\n";\n-\topen my $rejected, \'>\', $fastq_rejected || die "cannot create $fastq_rejected $! \\n";\n-\topen my $sam_uni, \'>\', $sam_unique || die "cannot create $sam_unique $! \\n";\n-\tmy $sequence = \'\';\n-\twhile(<$fic>)'..b'rt -O BAM --threads $number_of_cpus /dev/stdin 2> $sort_err > $bam_sorted`;\n-}\n-\n-sub BWA_call\n-{\n-\tmy ( $index, $fastq, $sam, $mismatches, $number_of_cpus, $report ) = @_;\n-\tmy ( $aln_err, $samse_err, $seq_num ) = ( $sam.\'_aln.err\', $sam.\'_samse.err\', 0 );\n-\tprint $report "-----------------------------\\n";\n-\tprint $report "bwa aln -t $number_of_cpus -n $mismatches $index $fastq 2> $aln_err | bwa samse $index /dev/stdin $fastq 2> $samse_err > $sam\\n";\n-\t`bwa aln -t $number_of_cpus -n $mismatches $index $fastq 2> $aln_err | bwa samse $index /dev/stdin $fastq 2> $samse_err > $sam `;\n-}\n-\n-sub rpms_rpkm\n-{\n- my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_;\n- open(my $out, ">".$out_file) || die "cannot open normalized file $! \\n";\n- print $out "ID\\treads counts\\tRPKM";\n- print $out "\\tper million of piRNAs" if ($piRNA_number != 0);\n- print $out "\\tper million of miRNAs" if ($miRNA_number != 0);\n- print $out "\\tper million of bonafide reads" if ($bonafide_number != 0);\n- print $out "\\n";\n- foreach my $k ( sort keys %{$counthashR} )\n- {\n- my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0);\n- \n- $rpkm = ( $counthashR->{$k} * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ;\n- print $out $k."\\t".$counthashR->{$k}."\\t"; printf $out "%.2f",$rpkm;\n- \n- if ($piRNA_number != 0 )\n- {\n- $pirna = ( $counthashR->{$k} * 1000000) / $piRNA_number;\n- printf $out "\\t%.2f",$pirna;\n- }\n- if ($miRNA_number != 0 )\n- {\n- $mirna = ( $counthashR->{$k} * 1000000) / $miRNA_number;\n- printf $out "\\t%.2f",$mirna;\n- }\n- if ($bonafide_number != 0 )\n- {\n- $bonafide = ( $counthashR->{$k} * 1000000) / $bonafide_number;\n- printf $out "\\t%.2f",$bonafide;\n- }\n- print $out "\\n";\n- \t}\n- close $out;\n-}\n-\n-sub extract_sam\n-{\n- my ( $hashRef, $sam_in, $sam_out, $sam_uni_out, $fastq_out, $fastq_uni_out ) = @_;\n- \n-\topen my $s_in, \'<\', $sam_in || die "cannot open $sam_in file $!\\n";\n-\n- open my $f_out, \'>\', $fastq_out || die "cannot create $fastq_out $!\\n"; \n- open my $f_uni_out, \'>\', $fastq_uni_out || die "cannot create $fastq_uni_out $!\\n"; \n- \n- open my $s_out, \'>\', $sam_out || die "cannot create $sam_out file $!\\n" if defined ($hashRef);\n- open my $s_uni_out, \'>\', $sam_uni_out || die "cannot create $sam_uni_out file $!\\n";\n-\n- my $sequence = \'\';\n- while(<$s_in>)\n- {\n- if ($_ =~ /^\\@[A-Za-z][A-Za-z](\\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\\@CO\\t.*/ )\n- {\n- print $s_out $_ if defined ($hashRef);\n- print $s_uni_out $_;\n- next;\n- }\n- my @line = split (/\\t/,$_);\n- $sequence = $line[0];\n- if ( (! defined ($hashRef) )|| ( exists $hashRef->{$sequence} && $hashRef->{$sequence} == 1 ) )\n- {\n- my $arn = $line[9];\n- if ($line[1] & 16)\n- {\n- $arn =reverse($arn);\n- $arn =~ tr/atgcuATGCU/tacgaTACGA/;\n- }\n-\n- if ( ( $line[1] == 16 || $line[1] == 0 ) )\n-\t\t\t{\n- \tprint $f_out "\\@".$line[0]."\\n".$arn."\\n+\\n".$line[10]."\\n" ;\n- \tprint $s_out $_ if defined ($hashRef);\n- \tif ( $line[11] eq "XT:A:U" )\n- \t{\n- \t\tprint $f_uni_out "\\@".$line[0]."\\n".$arn."\\n+\\n".$line[10]."\\n" ;\n- \t\tprint $s_uni_out $_ ;\n- \t}\n- }\n- } \n- }\n- close $s_in; close $s_out if defined ($hashRef); \n- close $s_uni_out; close $f_out; close $f_uni_out;\n-}\n-\n-sub get_fastq_seq{\n- my $fastq = shift;\n- my %hash; my $cmp = 0; \n-\n- open my $fic, \'<\', $fastq || die "cannot open input file $! \\n";\n- while(<$fic>)\n- {\n- chomp $_;\n- $cmp++;\n- if ($cmp % 4 == 1)\n- {\n- die "file do not contain a @ at line $cmp\\n" unless ($_ =~ /^\\@/ );\n- if ($_ =~ /^\\@(.*)\\s.*/) { $hash{$1} = 1;}\n- elsif ($_ =~ /^\\@(.*)/) { $hash{$1} = 1;}\n- }\n- elsif ($cmp % 4 == 3 )\n- {\n- die "file do not contain a + at line $cmp\\n" unless $_ =~ /^\\+/;\n- }\n- }\n- close $fic;\n- return \\%hash;\n-}\n-\n-1;\n' |