view graphprot_predict_profile_wrapper.pl @ 0:215925e588c4 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/graphprot commit 9bb5f3c8ed8e87ec5652b5bc8bf9c774d5534a1a
author rnateam
date Fri, 25 May 2018 12:17:44 -0400
parents
children
line wrap: on
line source

#!/usr/bin/perl

use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use Cwd qw(getcwd abs_path);
use List::Util qw(sum);

=head1 NAME

=head1 SYNOPSIS

Galaxy wrapper script for GraphProt (-action predict_profile) to compute 
the binding profile for a given model on a given set of sequences provided 
in FASTA format. After profile prediction, average profiles get computed, 
scores signified and binding peak regions extracted. The score signification 
is done using the provided fitted GEV parameters, either from .params file 
or manually set. If score threshold is set (-thr-sc), p-value assignment 
will be skipped and set score threshold will be used to extract peak 
regions. NOTE: Additional lines .params file are used to store and get 
GEV parameters, as well as type of model (model_type: sequence|structure).
Also, this wrapper currently works for classification mode only.

PARAMETERS:

    -help|h         display help page
    -fasta          Input FASTA file (option -fasta)
    -model          Input .model file (option -model)
    -params         Input .params file
                    NOTE: uses .params file with additional
                    parameters
                    Manually set parameters (below) will override 
                    found settings in .params file
    -data-id        Data ID (option -prefix)

GraphProt model parameters (by default get from .params file):

    -onlyseq        Set if model is a sequence model
    -R              GraphProt model R parameter
    -D              GraphProt model D parameter
    -epochs         GraphProt model epochs parameter
    -lambda         GraphProt model lambda parameter
    -bitsize        GraphProt model bitsize parameter
    -abstraction    GraphProt model RNAshapes abstraction level 
                    parameter (set for structure models)

Peak region extraction parameters:

    -thr-sc         Score threshold for extracting peak regions
                    By default p-value of 0.05 is used. If no p-value 
                    calculation possible, -thr-sc is used with default: 0
    -thr-p          p-value threshold for extracting peak regions
                    By default, peak regions with p = 0.05 are extracted,
                    as well as p50 score peak regions (if info given)
                    Default: 0.05
    -merge-dist     Maximum merge distance for nearby peak regions
                    Default: report all non-overlapping regions
    -p50-out        Output p50 score filtered peak regions BED file
                    default: false

GEV distribution parameters:

    -distr-my       GEV distribution my parameter for calculating p-values
                    from scores
    -distr-sigma    GEV distrubution sigma parameter for calculating 
                    p-values from scores
    -distr-xi       GEV distribution xi parameter for calculating p-values
                    from scores
    -ap-extlr       Used average profile left right extension for 
                    averaging scores, which were used for distribution 
                    fitting. NOTE: usually a value of 5 was used for 
                    for getting GEV distribution and parameters. If you 
                    choose a different value here, calculated p-values 
                    will be wrong!
                    default : 5


=head1 DISCRIPTION

5) Write manual
6) add output p50 file with NOTE

6) put GP into rna_tools

NOTE:
Additional lines .params file used to store and get gev parameters, as well 
as type of model (model_type: sequence|structure).

Example .params content:
epochs: 20
lambda: 0.001
R: 1
D: 4
bitsize: 14
model_type: sequence
#ADDITIONAL MODEL PARAMETERS
ap_extlr: 5
gev_my: -2.5408
gev_sigma: 1.6444
gev_xi: -0.1383
p50_score: 6.51534 
p50_p_val: 0.0009059744 

=cut

############################
# COMMAND LINE CHECKING.
############################

# Command line argument variables.
my ($i_help, $i_fasta, $i_model, $i_params, $i_data_id, $i_thr_sc, $i_thr_p, $i_max_merge_dist, $i_p50_out, $i_gp_r, $i_gp_d, $i_gp_epochs, $i_gp_lambda, $i_gp_bitsize, $i_gp_abstr, $i_gp_onlyseq, $i_distr_my, $i_distr_sigma, $i_distr_xi, $i_ap_extlr);

# Parse the command line.
GetOptions ( "help|h" => \$i_help,
             "fasta:s" => \$i_fasta,
             "model:s" => \$i_model,
             "params:s" => \$i_params,
             "data-id:s" => \$i_data_id,
             "thr-sc:f" => \$i_thr_sc,
             "thr-p:f" => \$i_thr_p,
             "p50-out" => \$i_p50_out,
             "merge-dist:i" => \$i_max_merge_dist,
             "R:i" => \$i_gp_r,
             "D:i" => \$i_gp_d,
             "epochs:i" => \$i_gp_epochs,
             "lambda:f" => \$i_gp_lambda,
             "bitsize:i" => \$i_gp_bitsize,
             "abstr:i" => \$i_gp_abstr,
             "onlyseq" => \$i_gp_onlyseq,
             "distr-my:f" => \$i_distr_my,
             "distr-sigma:f" => \$i_distr_sigma,
             "distr-xi:f" => \$i_distr_xi,
             "ap-extlr:i" => \$i_ap_extlr ) or pod2usage(1);

# Check.
pod2usage(1) if $i_help;
($i_fasta and $i_model) or pod2usage "ERROR: -fasta, -model are mandatory";
if ($i_distr_my or $i_distr_sigma or $i_distr_xi) {
    ($i_distr_my and $i_distr_sigma and $i_distr_xi) or pod2usage "ERROR: expects all three distribution parameters to be set";
}

#######################
# SET PARAMETERS.
#######################

# Prefix.
my $data_id = "GraphProt";
if ($i_data_id) {
    $data_id = $i_data_id;
}
my $thr_sc = 0;
if ($i_thr_sc) {
    $thr_sc = $i_thr_sc;
}
my $thr_p = 0.05;
if ($i_thr_p) {
    $thr_p = $i_thr_p;
}
my $ap_extlr = 5;
if ($i_ap_extlr) {
    $ap_extlr = $i_ap_extlr;
}
my $max_merge_dist = 0;
if ($i_max_merge_dist) {
    $max_merge_dist = $i_max_merge_dist;
}

# Get parameters from .params file.
my %params;
if ($i_params) {
    open(IN, $i_params) or die "Cannot open $i_params: $!";
    while(<IN>) {
        next if ($_ =~ /^#/);
        if ($_ =~ /(.+):\s(.+)/) {
            $params{$1} = $2;
        }
    }
    close IN;
}

# Create GP parameter string.
my $params_string = "";
# -onlyseq
my $model_type = "structure";
if (exists $params{"model_type"}) {
    if ($params{"model_type"} eq "sequence") {
        $params_string .= " -onlyseq";
        $model_type = "sequence";
    }
} elsif ($i_gp_onlyseq) {
    $params_string .= " -onlyseq";
    $model_type = "sequence";
}
# -R
if ($i_gp_r) {
    $params_string .= " -R $i_gp_r";
} elsif (exists $params{"R"}) {
    my $v = $params{"R"};
    $params_string .= " -R $v";
} else {
    die "ERROR: -R needs to be set";
}
# -D
if ($i_gp_d) {
    $params_string .= " -D $i_gp_d";
} elsif (exists $params{"D"}) {
    my $v = $params{"D"};
    $params_string .= " -D $v";
} else {
    die "ERROR: -D needs to be set";
}
# -epochs
if ($i_gp_epochs) {
    $params_string .= " -epochs $i_gp_epochs";
} elsif (exists $params{"epochs"}) {
    my $v = $params{"epochs"};
    $params_string .= " -epochs $v";
} else {
    die "ERROR: -epochs needs to be set";
}
# -lambda
if ($i_gp_lambda) {
    $params_string .= " -lambda $i_gp_lambda";
} elsif (exists $params{"lambda"}) {
    my $v = $params{"lambda"};
    $params_string .= " -lambda $v";
} else {
    die "ERROR: -lambda needs to be set";
}
# -bitsize
if ($i_gp_bitsize) {
    $params_string .= " -bitsize $i_gp_bitsize";
} elsif (exists $params{"bitsize"}) {
    my $v = $params{"bitsize"};
    $params_string .= " -bitsize $v";
} else {
    die "ERROR: -bitsize needs to be set";
}
# -abstraction
if ($i_gp_abstr) {
    $params_string .= " -bitsize $i_gp_abstr";
    die "ERROR: -abstraction set with -onlyseq" unless ($model_type eq "structure");
} elsif (exists $params{"abstraction"}) {
    my $v = $params{"abstraction"};
    $params_string .= " -abstraction $v";
    die "ERROR: -abstraction set with -onlyseq" unless ($model_type eq "structure");
}

# Distribution parameters.
my ($distr_my, $distr_sigma, $distr_xi);
if ($i_distr_my) {
    $distr_my = $i_distr_my;
} elsif (exists $params{"gev_my"}) {
    $distr_my = $params{"gev_my"};
}
if ($i_distr_sigma) {
    $distr_sigma = $i_distr_sigma;
} elsif (exists $params{"gev_sigma"}) {
    $distr_sigma = $params{"gev_sigma"};
}
if ($i_distr_xi) {
    $distr_xi = $i_distr_xi;
} elsif (exists $params{"gev_xi"}) {
    $distr_xi = $params{"gev_xi"};
}
# Average profile extension parameter.
if (exists $params{"ap_extlr"}) {
    $ap_extlr = $params{"ap_extlr"};
}

print STDOUT "model_type:     $model_type\n";
print STDOUT "ap_extlr:       $ap_extlr\n";

if ($distr_my and $distr_sigma and $distr_xi) {
    print STDOUT "distr_my:       $distr_my\n";
    print STDOUT "distr_sigma:    $distr_sigma\n";
    print STDOUT "distr_xi:       $distr_xi\n";
}
print STDOUT "max_merge_dist: $max_merge_dist\n";

# p50 filter score.
my $p50_sc;
if (exists $params{"p50_score"}) {
    $p50_sc = $params{"p50_score"};
    print STDOUT "p50_score:      $p50_sc\n";
}
# p50 p-value.
my $p50_p;
if (exists $params{"p50_p_val"}) {
    $p50_p = $params{"p50_p_val"};
    print STDOUT "p50_p_val:      $p50_p\n";
}



##################################
# RUN GP PROFILE PREDICTION.
##################################

# Read in FASTA file headers.
my @fasta_ids;
my $headers = qx/grep ">" $i_fasta/;
while ($headers =~ />(.+?)\n/g) {
    push(@fasta_ids,$1);
}

# Run GP profile prediction.
my $gp_call = "GraphProt.pl -action predict_profile -model $i_model -fasta $i_fasta -prefix $data_id $params_string";
print STDOUT "GraphProt call: $gp_call\n";
# &> profile_prediction.log
qx/$gp_call/;


####################################
# CALCULATE AVERAGE PROFILE.
####################################

# Calculate .average_profile from GraphProt .profile file.
# Also add p-value column if distr parameters set.
my $add_p = 0;
if ($distr_my and $distr_sigma and $distr_xi) {
    $add_p = 1;
}

# Average_profile: 1-based, FASTA headers as IDs, using ap_extlr for averaging scores.
my $profile = "$data_id.profile";
my $average_profile = "$data_id.average_profile";
# Calculate window size.
my $win = $ap_extlr * 2 + 1;

open (IN, $profile) or die "Cannot open $profile: $!\n";
open (OUT, '>', $average_profile) or die "Cannot open $average_profile: $!";

# Old ID.
my $old_id = "-";
# Current ID.
my $cur_id = "-";
# Start position of the window.
my $pos_inc = 0;
# Score array.
my @scores;
# Input row counter.
my $c_in = 0;
# Output row counter.
my $c_out = 0;

while (<IN>) {

    if ($_ =~ /(.+?)\t\d+?\t(.+?)\n/) {

        $cur_id = $1;
        my $score = $2;
        $c_in++;

        # Case: New refseq ID / new paragraph.
        if ($cur_id ne $old_id) {

            # Print remaining entries at the end of former paragraph.
            while (scalar(@scores) >= ($ap_extlr + 1)) {
                # Calculate avg array score.
                my $mean = array_mean(@scores);
                $c_out++;
                $pos_inc++; 
                if ($add_p) {
                    my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
                    my $p = sprintf("%.8f", (1-$y));
                    print OUT "$fasta_ids[$old_id]\t$pos_inc\t$mean\t$p\n";
                } else {
                    print OUT "$fasta_ids[$old_id]\t$pos_inc\t$mean\n";
                }
                # Remove old score from beginning.
                shift(@scores);
            }

            # Reset for new paragraph.
            $pos_inc = 0;
            @scores = ();
            $old_id = $cur_id;
            # Save first score.
            push(@scores, $score);
            next;

        }

        # Push score in array for average calculation.
        push(@scores, $score);

        # Case: array smaller $win.
        if (scalar(@scores) < $win) {
            # Subcase: array more than half size, start with calculation.
            if (scalar(@scores) >= ($ap_extlr + 1)) {
                # Calculate avg array score.
                my $mean = array_mean(@scores);
                $c_out++;
                $pos_inc++;
                if ($add_p) {
                    my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
                    my $p = sprintf("%.8f", (1-$y));
                    print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\t$p\n";
                } else {
                    print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\n";
                }
            }
            next;
        }

        # Case: array has $win size.
        if (scalar(@scores) == $win) {
            # Calculate avg array score.
            my $mean = array_mean(@scores);
            $c_out++;
            $pos_inc++;
            if ($add_p) {
                my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
                my $p = sprintf("%.8f", (1-$y));
                print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\t$p\n";
            } else {
                print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\n";
            }
            # Remove old score from beginning.
            shift(@scores);
            next;
        }
    }
}

# Print remaining entries at the end of last section.
while (scalar(@scores) >= ($ap_extlr + 1)) {
    # Calculate avg array score.
    my $mean = array_mean(@scores);
    $c_out++;
    $pos_inc++;
    if ($add_p) {
        my $y = exp(-1*(1 + ( ($mean-$distr_my)/$distr_sigma )*$distr_xi)**(-1/$distr_xi));
        my $p = sprintf("%.8f", (1-$y));
        print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\t$p\n";
    } else {
        print OUT "$fasta_ids[$cur_id]\t$pos_inc\t$mean\n";
    }
    # Remove old score from beginning.
    shift(@scores);
}
close IN;
close OUT;

qx/rm -f $profile/;


#########################
# GET PEAK REGIONS.
#########################


my $p50_peaks_bed_out = $data_id . ".peak_regions_p50.bed";
my $peaks_bed_out = $data_id . ".peak_regions.bed";

# If p-values were calculated, use set p-value to get peak regions.
if ($add_p) {
    extract_peak_regions_from_p_values($average_profile, $peaks_bed_out, $max_merge_dist, $thr_p);
    # If p50-out set.
    if ($i_p50_out) {
        # If p50 p-value present, also get peak regions file for this threshold.
        if ($p50_p) {
            extract_peak_regions_from_p_values($average_profile, $p50_peaks_bed_out, $max_merge_dist, $p50_p);
        } else {
            qx/touch $p50_peaks_bed_out/;
        }
    }
} else {
    # If no p-values available, use score threshold for defining peak regions.
    extract_peak_regions_from_scores($average_profile, $peaks_bed_out, $max_merge_dist, $thr_sc);
    # If p50-out set.
    if ($i_p50_out) {
        # If p50 score present, also get peak regions file for this threshold.
        if ($p50_sc) {
            extract_peak_regions_from_scores($average_profile, $p50_peaks_bed_out, $max_merge_dist, $p50_sc);
        } else {
            qx/touch $p50_peaks_bed_out/;
        }
    }
}

exit;



################################################################################
################################################################################
# SUBROUTINES.
################################################################################


sub array_mean {

  my $mean = sum(@_)/@_;
  return sprintf("%.5f", $mean);

}


################################################################################

sub extract_peak_regions_from_p_values {

    my ($average_profile, $peak_regions_bed_file, $max_merge_dist, $max_p) = @_;

    my $old_ref = "no";
    my $in = "N";
    my $ref_id;
    my $start;
    my $end;
    my $best_p = 1;
    my $best_sc = -1000;
    my $region_s;
    my $region_e;
    my $zero_pos;

    my $temp_bed_file = "peak_regions.tmp.bed";

    open (IN, $average_profile) or die "Cannot open $average_profile: $!";
    open (OUT, '>', $temp_bed_file) or die "Cannot open $temp_bed_file: $!";

    while (<IN>) {
        chomp;
        my ($ref, $s, $sc, $p) = (split /\t/)[0,1,2,3];
        # If file has zero-based positions.
        if ($s == 0) {
            $zero_pos = 1;
        }
        # If positions are one-based, make them zero-based.
        unless ($zero_pos) {
            $s -= 1;
        }
        # At transcript ends, if in positive region, write and reset.
        if ($old_ref ne $ref) {
            if ($in eq "Y") {
                print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc;$best_p\t0\t+\n";
                $in = "N";
            }
        }
        $old_ref = $ref;
        # Deal with good p-value regions.
        if ($p <= $max_p) {
            # Start of a positive cluster.
            if ($in eq "N") {
                $start = $s;
                $region_s = $s;
                $region_e = $s + 1;
                $end = $s + 1;
                $best_p = $p;
                $best_sc = $sc;
                $ref_id = $ref;
                $in = "Y";
                next;
            # Inside a positive cluster.
            } elsif ($in eq "Y") {
                if ($p < $best_p) {
                    $start = $s;
                    $end = $s + 1;
                    $best_p = $p;
                    $best_sc = $sc;
                    $ref_id = $ref;
                }
                $region_e++;
                next;
            }
        } else {
            # If we were in positive cluster before.
            if ($in eq "Y") {
                print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc;$best_p\t0\t+\n";
                $in = "N";
            }
        }
    }
    # After last line processed.
    if ($in eq "Y") {
      print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc;$best_p\t0\t+\n";
      $in = "N";
    }
    close IN;
    close OUT;
    # If merge distance zero (i.e. end of one block is -1 from start of next block).
    if ($max_merge_dist == 0) {
        qx/cat $temp_bed_file > $peak_regions_bed_file/;
    } else {
        # Merge nearby regions.
        open(IN, $temp_bed_file) or die "Cannot open $temp_bed_file: $!";
        open(OUT, '>', $peak_regions_bed_file) or die "Cannot open $peak_regions_bed_file: $!";
        # For storing current block stats.
        my $block_chr = 0;
        my ($block_s, $block_e, $block_best_pos, $block_best_p, $block_best_sc);
        while (<IN>) {
            chomp;
            my ($chr, $s, $e, $id) = (split /\t/)[0,1,2,3];
            my ($best_pos, $best_sc, $best_p) = (split /;/, $id);
            if ($chr eq $block_chr) {
                # If $block_e, $s within merge merge.
                if ( ($s - $block_e) <= $max_merge_dist ) {
                    # Update block stats.
                    $block_e = $e;
                    if ($block_best_p > $best_p) {
                        $block_best_p = $best_p;
                        $block_best_sc = $best_sc;
                        $block_best_pos = $best_pos;
                    }
                } else {
                    # If $e outside merge range, print block.
                    print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc;$block_best_p\t0\t+\n";
                    # Store new block.
                    ($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc, $block_best_p) = ($chr, $s, $e, $best_pos, $best_sc, $best_p);
                }

            } else {
                # If new chromosome, print last block, otherwise it is the first block.
                if ($block_chr) {
                    print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc;$block_best_p\t0\t+\n";
                }
                ($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc, $block_best_p) = ($chr, $s, $e, $best_pos, $best_sc, $best_p);
            }
        }
        # Print last block.
        if ($block_chr) {
            print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc;$block_best_p\t0\t+\n";
        }
        close OUT;
        close IN;
    }
    qx/rm -f $temp_bed_file/;
}


################################################################################

sub extract_peak_regions_from_scores {

    my ($average_profile, $peak_regions_bed_file, $max_merge_dist, $min_sc) = @_;

    my $old_ref = "no";
    my $in = "N";
    my $ref_id;
    my $start;
    my $end;
    my $best_sc = -1000;
    my $region_s;
    my $region_e;
    my $zero_pos;

    my $temp_bed_file = "peak_regions.tmp.bed";

    open (IN, $average_profile) or die "Cannot open $average_profile: $!";
    open (OUT, '>', $temp_bed_file) or die "Cannot open $temp_bed_file: $!";

    while (<IN>) {
        chomp;
        my ($ref, $s, $sc) = (split /\t/)[0,1,2];
        # If file has zero-based positions.
        if ($s == 0) {
            $zero_pos = 1;
        }
        # If positions are one-based, make them zero-based.
        unless ($zero_pos) {
            $s -= 1;
        }
        # At transcript ends, if in positive region, write and reset.
        if ($old_ref ne $ref) {
            if ($in eq "Y") {
                print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc\t0\t+\n";
                $in = "N";
            }
        }
        $old_ref = $ref;
        # Deal with positive regions.
        if ($sc > $min_sc) {
            # Start of a positive cluster.
            if ($in eq "N") {
                $start = $s;
                $region_s = $s;
                $region_e = $s + 1;
                $end = $s + 1;
                $best_sc = $sc;
                $ref_id = $ref;
                $in = "Y";
                next;
            # Inside a positive cluster.
            } elsif ($in eq "Y") {
                if ($sc > $best_sc) {
                    $start = $s;
                    $end = $s + 1;
                    $best_sc = $sc;
                    $ref_id = $ref;
                }
                $region_e++;
                next;
            }
        } else {
            # If we were in positive cluster before.
            if ($in eq "Y") {
                print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc\t0\t+\n";
                $in = "N";
            }
        }
    }
    # After last line processed.
    if ($in eq "Y") {
      print OUT "$ref_id\t$region_s\t$region_e\t$end;$best_sc\t0\t+\n";
      $in = "N";
    }
    close IN;
    close OUT;
    # If merge distance zero (i.e. end of one block is -1 from start of next block).
    if ($max_merge_dist == 0) {
        qx/cat $temp_bed_file > $peak_regions_bed_file/;
    } else {
        # Merge nearby regions.
        open(IN, $temp_bed_file) or die "Cannot open $temp_bed_file: $!";
        open(OUT, '>', $peak_regions_bed_file) or die "Cannot open $peak_regions_bed_file: $!";
        # For storing current block stats.
        my $block_chr = 0;
        my ($block_s, $block_e, $block_best_pos, $block_best_sc);
        while (<IN>) {
            chomp;
            my ($chr, $s, $e, $id) = (split /\t/)[0,1,2,3];
            my ($best_pos, $best_sc) = (split /;/, $id);
            if ($chr eq $block_chr) {
                # If $block_e, $s within merge merge.
                if ( ($s - $block_e) <= $max_merge_dist ) {
                    # Update block stats.
                    $block_e = $e;
                    if ($block_best_sc < $best_sc) {
                        $block_best_sc = $best_sc;
                        $block_best_pos = $best_pos;
                    }
                } else {
                    # If $e outside merge range, print block.
                    print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc\t0\t+\n";
                    # Store new block.
                    ($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc) = ($chr, $s, $e, $best_pos, $best_sc);
                }

            } else {
                # If new chromosome, print last block, otherwise it is the first block.
                if ($block_chr) {
                    print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc\t0\t+\n";
                }
                ($block_chr, $block_s, $block_e, $block_best_pos, $block_best_sc) = ($chr, $s, $e, $best_pos, $best_sc);
            }
        
        }
        # Print last block.
        if ($block_chr) {
            print OUT "$block_chr\t$block_s\t$block_e\t$block_best_pos;$block_best_sc\t0\t+\n";
        }
        close OUT;
        close IN;
    }
    qx/rm -f $temp_bed_file/;
}


################################################################################