# HG changeset patch # User romaingred # Date 1508140009 14400 # Node ID 16c3c1b0362ab952a333eb656aad0e9926c5c896 # Parent 8b7000eac6a0030c8d1e43daa34935fbed9ad813 Uploaded diff -r 8b7000eac6a0 -r 16c3c1b0362a bin/html.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bin/html.pm Mon Oct 16 03:46:49 2017 -0400 @@ -0,0 +1,907 @@ +package html; + +use strict; +use warnings; +use File::Basename; + +use Exporter; +our @ISA = qw( Exporter ); +our @EXPORT_OK = qw( &main_page &details_pages &menu_page &ppp_page ); + +sub main_page +{ + my ( $dir, $file, $list_mainTabP, $current, $ma, $ma_uni, $dir_root ) = @_; + my ( $futHashP, $uniqueTabP, $randTabP, $pngTabP ) = get_genome ( $dir, $dir_root ); + + open my $h, '>', $file || die "cannot create $file $!\n"; + header ( $h ); + navbar ( $h, $list_mainTabP, $current ); + print $h "

View details »

\n"; + futurette( $h, $current, $pngTabP, $futHashP ); + print $h "

mappers #: $ma

unique mappers #: $ma_uni

\n"; + carousel2( $h, $uniqueTabP, $randTabP, $dir_root ); + footer($h); + close $h; +} + +sub menu_page +{ + my ( $dir, $file, $list_mainTabP, $current, $min, $max, $simin, $simax, $pimin, $pimax, $dir_root ) = @_; + my $html_ref = $1 if $dir =~ /$dir_root(.*)/; + open my $h, '>', $file || die "cannot create $file $!\n"; + header($h); + navbar ( $h, $list_mainTabP, $current ); + span( $h, $current, $min, $max, $simin, $simax, $pimin, $pimax ); + print $h "

\n"; + print $h " text file
\n
"; + footer($h); + close $h; +} + +sub details_pages +{ + my ( $dir_details, $prefix, $list_mainTabP, $current, $misTE, $dir_root ) = @_; + my ($Hex, $HTE, $HG, $NonUniTE, $NonUniG, $UniG ) = get_subgroups( $dir_details, $current, $misTE, $dir_root ); + + my $html_ref = $1.'-PPP.html' if $prefix =~ /$dir_root(.*)/; + open my $h, '>', $prefix.'-TEs.html' || die "cannot create $prefix-TEs.html $!\n"; + header($h); + navbar ( $h, $list_mainTabP, $current ); + if ( $prefix =~ /bonafide_reads$/ ) + { + print $h "
"; + print $h "

Ping Pong Partners

\n"; + print $h "
"; + } + fut($h,'Transposable elements',$HTE); + carousel($h,$NonUniTE,$dir_root); + footer($h); + close $h; + + open $h, '>', $prefix.'-genome.html' || die "cannot create $prefix-genome.html $!\n"; + header($h); + navbar ( $h, $list_mainTabP, $current ); + fut($h,'Genome',$HG); + carousel2($h,$UniG, $NonUniG,$dir_root); + footer($h); + close $h; + + open $h, '>', $prefix.'-exons.html' || die "cannot create $prefix-exons.html $!\n"; + header($h); + navbar ( $h, $list_mainTabP, $current ); + fut($h,'Exons',$Hex); + footer($h); + close $h; +} + +sub ppp_page +{ + my ( $dir, $file, $list_mainTabP, $current, $ppp, $dir_root ) = @_; + + my $ppp_file = $ppp.'ppp.txt'; + open my $h, '>', $file || die "cannot create $file $!\n"; + header($h); + navbar ( $h, $list_mainTabP, $current ); + print $h '
'."\n"; + print $h ' + + + + + + + + + + + '; + + open my $f, '<', $ppp_file || die "cannot open $ppp_file $!\n"; + while ( <$f> ) + { + chomp; + print $h ''; + my ( $id, $sum, $ten, $mean, $sd, $zscore, $prob) = split /\t/, $_; + if( -d "$ppp/$id" ) + { + my $sub_html = $ppp.$id.'.html'; + my $sub_html_ref = $1.$id if $ppp =~ /$dir_root(.*)/; + print $h ""; + + open my $sub, '>', $sub_html || die "cannot create $sub_html\n"; + { + header($sub); + print $sub " +
+

$id

+

+

ping pong signature

+

sens reads with PPP

+

reverse reads with PPP

+

sens reads without PPP

+

reverse reads without PPP

+
"; + footer($sub); + } + close $sub; + + } + else { print $h "\n"; } + print $h "\n"; + + print $h ''; + } + close $f; + print $h "
IDoverlap sumten overlap summeanstandard deviationz-scorep-value
$id $id $sum $ten $mean $sd $zscore $prob
"; + footer($h); + close $h; +} + +sub get_genome +{ + my ( $dir, $dir_root ) = @_; + my ( %hash, @group, @Unique, @NonUnique, @png ); + + my $fut = $dir.'/*'; + my @fut = glob $fut; + + + foreach my $fr ( @fut ) + { + my $f = $1 if $fr =~ /$dir_root(.*)/; + if ( $fr =~ /.*Gviz/ ) + { + my $nu = $fr.'/rand/*'; + @NonUnique = glob $nu; + my $u = $fr.'/unique/*'; + @Unique = glob $u; + } + elsif ( $f =~ /.*distribution\.txt$/ ) { $hash{'mappers size distribution (txt)'} = $f; } + elsif ( $f =~ /.*distribution\.png$/ ) { push @png, $f; } + elsif ( $f =~ /.*unique\.fastq$/ ) { $hash{'unique mappers (fastq.gz)'} = $f.'.gz'; `gzip $fr`; } + elsif ( $f =~ /.*rejected\.fastq$/ ) { $hash{'unmapped (fastq.gz)'} = $f.'.gz'; `gzip $fr`; } + elsif ( $f =~ /.*all\.fastq$/ ) { $hash{'mappers (fastq.gz)'} = $f.'.gz'; `gzip $fr`; } + elsif ( $f =~ /.*dup_unique\.txt$/ ) { $hash{'unique mappers (txt)'} = $f; } + elsif ( $f =~ /.*dup_mapnum\.txt$/ ) { $hash{'mappers (txt)'} = $f; } + elsif ( $f =~ /.*dup_nonmapp\.txt$/ ) { $hash{'unmapped (txt)'} = $f; } + elsif ( $f =~ /.*_unique_sorted\.bam$/ ) { $hash{'unique alignment (bam)'} = $f; } + elsif ( $f =~ /.*_sorted\.bam$/ ) { $hash{'alignment (bam)'} = $f; } + elsif ( $f =~ /.*unique_plus.bedgraph/) { $hash{'bedgraph unique plus strand'} = $f; } + elsif ( $f =~ /.*unique_minus.bedgraph/) { $hash{'bedgraph unique minus strand'} = $f; } + elsif ( $f =~ /.*plus.bedgraph/) { $hash{'bedgraph plus strand'} = $f; } + elsif ( $f =~ /.*minus.bedgraph/) { $hash{'bedgraph minus strand'} = $f; } + else { unlink $fr; } + } + return (\%hash, \@Unique, \@NonUnique, \@png); +} + +sub span +{ + my ( $file, $name, $min, $max, $simin, $simax, $pimin, $pimax ) = @_; + + print $file " +
+
+
+

bonafide reads

+ reads of size between $min and $max
with no mi, sn, t and r RNAs +

Genome

+

TE

+

Exons

+
+
+

siRNAs

+ bonafide reads of size between $simin and $simax +

Genome

+

TE

+

Exons

+
+
+

piRNAs

+ bonafide reads of size between $pimin and $pimax +

Genome

+

TE

+

Exons

+
+
+
+
+

miRNAs

+

Genome

+

TE

+

Exons

+
+
+
+"; +} + +sub get_subgroups +{ + my ( $dir, $name, $misTE, $dir_root ) = @_; + my (%Hex, %HTE, %HG, @group, @png, @pngTE, @NonUniTE, @UniG, @NonUniG ); + + my $fut = $dir.'/*'; + my @fut = glob $fut; + my $f =''; + foreach my $fr ( @fut ) + { + $f = $1 if $fr =~ /$dir_root(.*)/; + + if ( $f =~ /genome_unique_sorted\.bam$/ ) { $HG{'genome unique mappers (sorted bam)'} = $f; } + elsif ( $f =~ /genome_sorted\.bam$/ ) { $HG{'genome mappers (sorted bam)'} = $f; } + elsif ( $f =~ /miRNAs_reads_counts\.txt$/ ) { $HG{'miRNAs per type (txt)'} = $f; } + elsif ( $f =~ /genome_unique_plus\.bedgraph$/) { $HG{'bedgraph unique plus strand'} = $f; } + elsif ( $f =~ /genome_unique_minus\.bedgraph$/) { $HG{'bedgraph unique minus strand'} = $f; } + elsif ( $f =~ /genome_plus\.bedgraph$/) { $HG{'bedgraph plus strand'} = $f; } + elsif ( $f =~ /genome_minus\.bedgraph$/) { $HG{'bedgraph minus strand'} = $f; } + elsif ( $f =~ /TEs_plus\.bedgraph$/) { $HTE{'bedgraph plus strand'} = $f; } + elsif ( $f =~ /TEs_minus\.bedgraph$/) { $HTE{'bedgraph minus strand'} = $f; } + elsif ( $f =~ /exons_sorted\.bam$/) { $Hex{'exons mappers (sorted bam)'} = $f;} + elsif ( $f =~ /exons_unique_sorted\.bam$/) { $Hex{'exons unique mappers (sorted bam)'} = $f;} + elsif ( $f =~ /exons_reads_counts\.txt$/) { $Hex{'read number per exon (txt)'} = $f;} + elsif ( $f =~ /TEs_reads_counts\.txt$/) { $HTE{"read number per TE 0 to $misTE mismatches (txt)"} = $f; } + elsif ( $f =~ /TEs_reads_counts_mismatches\.txt$/) { $HTE{"read number per TE with 1 to $misTE mismatches (txt)"} = $f; } + elsif ( $f =~ /TEs_reads_counts_nomismatches\.txt$/) { $HTE{'read number per TE with no mismatch (txt)'} = $f; } + elsif ( $f =~ /TEs_unique_sorted\.bam$/) { $HTE{'TEs unique mappers (sorted bam)'} = $f; } + elsif ( $f =~ /TEs_sorted\.bam$/) { $HTE{'TEs mappers (sorted bam)'} = $f; } + elsif ( $fr =~ /.*Gviz_TEs/ ) + { + my $nu = $fr.'/*'; + @NonUniTE = glob $nu; + } + elsif ( $fr =~ /.*Gviz_genome/ ) + { + my $nu = $fr.'/rand/*'; + @NonUniG = glob $nu; + my $u = $fr.'/unique/*'; + @UniG = glob $u; + } + else { unlink $fr; } + } + return (\%Hex, \%HTE, \%HG, \@NonUniTE, \@NonUniG, \@UniG); +} + +sub header +{ + my $file = shift; + print $file " + + + + + pipeline + + + + + + + + + + + "; +} + +sub navbar +{ + my ( $file, $fastq, $actif ) = @_; + + print $file " +
+
+
+ + Report +
+
    + "; + for (my $i = 0 ; $i <= $#{$fastq}; $i++) + { + # my $fa = basename($fastq->[$i],'.dat'); + my $fa = $fastq->[$i]; + if ($actif eq $fa){ print $file "
  • [$i].html\">$fa
  • ";} + else {print $file "
  • [$i].html\">$fa
  • " ;} + } + print $file " +
+
+
+
+
"; +} + +sub footer +{ + my $file = shift; + print $file " + +
+ +
+ + + + + + + + + + + + "; +} + +sub carousel +{ + my ($file, $non_unique, $dir_root) = @_; + my $ac = 0; + print $file " +
+
+
+
+
+
+
+
+
+
Reads randomly assigned
+
+
+
    + "; + foreach my $u (@{$non_unique}) + { + my $name = basename($u,'.png'); + $u = $1 if $u =~ /$dir_root(.*)/; + print $file " +
  • + $name +
  • + "; + } + print $file " +
+
+
+
+
+ "; +} + +sub carousel2 +{ + my ($file, $unique, $non_unique, $dir_root) = @_; + print $file " +
+
+
+
+
+
+
+
+
+
Uniquely mapped reads
+
+
+
    + "; + + foreach my $u (@{$unique}) + { + my $name = basename($u,'.png'); + $u = $1 if $u =~ /$dir_root(.*)/; + print $file " +
  • + $name +
  • + "; + } + print $file " +
+
+
+
+
+
+
+
+
+
+
+
+
Reads randomly assigned
+
+
+
    + "; + + foreach my $nu (@{$non_unique}) + { + my $name = basename($nu,'.png'); + $nu = $1 if $nu =~ /$dir_root(.*)/; + print $file " +
  • + $name +
  • + "; + } + print $file " +
+
+
+
+
+ "; +} + +sub futurette +{ + my ($file, $name, $png, $hash) = @_; + print $file " +
+
+

$name

+

+ "; + foreach my $k (sort keys %{$hash}) + { + print $file "$k
\n" ; + } + + print $file " +

"; + + foreach my $pn (@{$png}){print $file "
";} + + print $file " +
+
+ "; +} + +sub fut +{ + my ($file, $name, $hash) = @_; + print $file " +
+
+

$name

+

+ "; + + foreach my $k (sort { ${$hash}{$a} cmp ${$hash}{$b} } keys %{$hash}) + { + print $file "$k
\n" ; + } + + print $file " +

+
+
+ "; +} + +sub get_distri_exon +{ + my ($dir, $name) = @_; + my (@out,@group); + my $group = $dir.'/'.$name.'-subgroups-bonafide_reads-exons-*distribution-*.png'; + @group = glob $group; + foreach (my $g =0; $g <= $#group; $g++) + { + if ($group[$g] =~ /.*($name-subgroups-bonafide_reads-exons-.*distribution-.*\.png)/ ) + { + my $tmp = $1; + push @out, $1; + } + } + return (\@out); +} + +sub get_distri_TE +{ + my ($dir, $name) = @_; + my (@out,@group); + my $group = $dir.'/'.$name.'-subgroups-bonafide_reads-TE-*distribution-*.png'; + @group = glob $group; + foreach (my $g =0; $g <= $#group; $g++) + { + if ($group[$g] =~ /.*($name-subgroups-bonafide_reads-TE-.*distribution-.*\.png)/ ) + { + my $tmp = $1; + push @out, $1; + } + } + return (\@out); +} + +sub get_PPP +{ + my ($dir,$name) = @_; + my (%distri,@group); + my $group = $dir.'/'.$name.'-subgroups-bonafide_reads-TE-PPPartners-*'; + @group = glob $group; + + foreach (my $g =0; $g <= $#group; $g++) + { + if ($group[$g] =~ /.*($name-subgroups-bonafide_reads-TE-PPPartners-.*)/ ) + { + my $tmp = $1; + if ($tmp =~ /PPPartners-(.*?)-sens\.txt$/) + { + $distri{$1} = ['','','','','',''] unless exists $distri{$1}; + $distri{$1}->[0] = $tmp; + } + elsif ($tmp =~ /PPPartners-(.*?)-antisens\.txt$/) + { + $distri{$1} = ['','','','','',''] unless exists $distri{$1}; + $distri{$1}->[1] = $tmp; + } + elsif ($tmp =~ /PPPartners-(.*?)-sensPPP\.txt$/) + { + $distri{$1} = ['','','','','',''] unless exists $distri{$1}; + $distri{$1}->[2] = $tmp; + } + elsif ($tmp =~ /PPPartners-(.*?)-antisensPPP\.txt$/) + { + $distri{$1} = ['','','','','',''] unless exists $distri{$1}; + $distri{$1}->[3] = $tmp; + } + elsif ($tmp =~ /PPPartners-(.*?)-overlap_size\.txt$/) + { + $distri{$1} = ['','','','','',''] unless exists $distri{$1}; + $distri{$1}->[4] = $tmp; + } + elsif ($tmp =~ /PPPartners-(.*?)-histogram\.png$/) + { + $distri{$1} = ['','','','','',''] unless exists $distri{$1}; + $distri{$1}->[5] = $tmp; + } + } + } + return \%distri; +} + +sub PPPrint +{ + my ($h, $hash) = @_; + my $cmp = 0; + + print $h "
\n"; + print $h "
"; + while ( my ($k,$v) = each %{$hash} ) + { + print $h "
" if $cmp != 0 && $cmp % 2 == 0; + print $h " + + + "; + $cmp++; + } + + print $h "
"; +} + +sub printDistri +{ + my ($h, $tab) = @_; + my ($txt, $name); + my $cmp = 0; + print $h "
\n"; + print $h "
"; + foreach my $k (@{$tab}) + { + if ($k =~ /(.*)-(.*)\.png$/) + { + $txt = $1.'-'.$2.'.txt'; + $name = $2; + } + print $h "
" if $cmp != 0 && $cmp % 2 == 0; + print $h " + +
+

$name

+

+

text file

+
+ "; + $cmp++; + } + + print $h "
"; +} + +sub mapnum +{ + my $dupmapnum = shift; + my $dupnum_genome = shift; + open (my $dupTE, $dupmapnum) || die "cannot open ".$dupmapnum."\n"; + my %dupnum_TE = (); + my $header = <$dupTE>; + while (<$dupTE>) + { + chomp $_; + my @dupline = split /\t/, $_; + $dupnum_TE{$dupline[0]} = $dupline[2]; + } + close $dupTE; + open (my $du_TE, '>'.$dupmapnum) || die "cannot open to write ".$dupmapnum."\n"; + print $du_TE "sequence\tduplicate\tgenome map num\tmap num\n"; + while (my ($k, $v) = each %dupnum_TE ) + { + my $hashRef = ${$dupnum_genome}{$k}; + print $du_TE "$k\t$hashRef->[0]\t$hashRef->[1]\t$v\n"; + } + close $du_TE; +} + +1;