view annovar_yaml/annovar_yaml.pl @ 17:ee5053b5e72b draft

Uploaded new version including --paryml
author niels
date Mon, 10 Jun 2019 07:25:08 -0400
parents 7f4fb56714e8
children ce16079341bf
line wrap: on
line source

#/usr/bin/perl
#This code is using v4 of the Run_Annovar_vx.pl script!

##################
#!!TEST COMMAND!!#
##################
#Following input required, input fixed currently to run limited Annovar settings
#perl annovar_yaml.pl --inyml YAML_annovar.yml --invcf invcf.vcf --outvcf outvcf.vcf > log.txt

#Future prospects: ability to learn to read YAML files to pick up parameters. Manual definition of command line still required!

#perl modules use
use YAML;
use YAML::Tiny;
use YAML::XS 'LoadFile';
use Data::Dumper;
#use Data::YAML::Writer;
use Getopt::Long;
use feature qw(postderef postderef_qq);
use feature qw(refaliasing);
#no warnings qw(experimental::postderef);
use strict;

#input required
#two modes run and edit
#Basis Yaml file [default required]
#Yaml out file [required in edit mode]
#species [default required]
#build [default required]
#protocol [default required]
#protocolversion [default required]

#basic paramters
#Switch to enable disable Aliasses; enable=1 disable=0
my $use_alias = "1";
my $edit = "0";
my $run = "0";
my $application = "0";
#arguments yaml file is fixed!
my $parameter_yml = "YAML_arguments_annovar.yml";
my $inyml;
my $outyml;
my $invcf;
my $outvcf;
my $yml_hash_arguments;
my %yml_hash_arguments;
my $yml_hash;
my %yml_hash;
my $yml_hash_edit;
my %yml_hash_edit;
my $in;
my $in2;
my %in;
my %in2;
my $writer;
my $lookup;
my $data_type;

#parameter applications
my %annovarparameters_array;
my %annovarparameters_single;
my $annovarparameters_array;
my $annovarparameters_single;

#Strings used as input parameters
#my $input_general_location_database;
#my $input_general_dir_scripts;
#my $input_general_location_scripts_coding_annovar;
#my $input_general_location_scripts_table_annovar;
my $input_annovar_build;
my $input_annovar_species;
my $inprotocol;
my $inprotocolversion;
my @inprotocol;
my @inprotocolversion;

my $input_general_settings_dot2underline;
my $input_general_settings_nastring;
my $input_general_settings_otherinfo;
my $input_general_settings_polish;
my $input_general_settings_remove;
my $input_general_settings_thread;
my $input_general_settings_inputformat;

my $input_databases_available;
my $input_databases_required;

my $input_databases_colswanted;
my $input_databases_exonicsplic;
my $input_databases_hgvs;
my $input_databases_operation;
my $input_databases_splicing;
my @input_databases_colswanted;
my @input_databases_exonicsplic;
my @input_databases_hgvs;
my @input_databases_operation;
my @input_databases_splicing;

#other variables
my $script;
my $type;
my $count;
my $column_build;
my $column_species;
my @column_build;
my @column_species;
my $pattern;
my $pattern2;
my $match;
my $match_value;
my @match_value;
my @readonly;
my %editinyml;
my $folder_symlinks = '0';
my $path_symlinks;
my @softlink_building_blocks;

#Fixed values for searching hashes
my $level5 = "DATABASES";

#parameters to build command
my $protocol;
my @protocol;
my $operation;
my $argument;

GetOptions (
	#Required input
	'protocol=s' => \$inprotocol,
	'protocolversion=s' => \$inprotocolversion,
	'script=s' => \$script,
	'invcf=s' => \$invcf,
	'outvcf=s' => \$outvcf,
	'application=s' => \$application,
        'inyml=s' => \$inyml,
        'outyml=s' => \$outyml,
        'build=s'       => \$input_annovar_build,
        'species=s'   => \$input_annovar_species,
	#Optional input
	'edit' => \$edit,
	'run' => \$run,
	#variables in GENERAL
	'dot2underline=s'   => \$input_general_settings_dot2underline,
	'nastring=s'   => \$input_general_settings_nastring,
	'otherinfo=s'   => \$input_general_settings_otherinfo,
	'polish=s'   => \$input_general_settings_polish,
	'remove=s'   => \$input_general_settings_remove,
	'thread=s'   => \$input_general_settings_thread,
	'inputformat=s'  => \$input_general_settings_inputformat,
	#variables in DATABASES
	'colswanted=s'   => \$input_databases_colswanted,
	'exonicsplic=s'   => \$input_databases_exonicsplic,
	'hgvs=s'   => \$input_databases_hgvs,
	'operation=s'   => \$input_databases_operation,
	'splicing=s'   => \$input_databases_splicing,
);

chomp $invcf;
chomp $outvcf;

#inactive options
#       'drequired=s'   => \$input_databases_required,
#       'davail=s'   => \$input_databases_available,

#       'gpathdatabase=s'   => \$input_general_location_database,
#	'gpathscripts=s'   => \$input_general_dir_scripts,
#       'gpathcodannovar=s'   => \$input_general_location_scripts_coding_annovar,
#       'gpathtabannovar=s'   => \$input_general_location_scripts_table_annovar,

#Check input parameters
#Do some checks whether all required arguments are given

#some code

#arrays with all application specific parameters
#make sure keys are unique to allow for parsing of yml! 

%annovarparameters_array = (
	"COLSWANTED" => "$input_databases_colswanted",
	"EXONICSPLIC" => "$input_databases_exonicsplic",
	"HGVS" => "$input_databases_hgvs",
	"OPERATION" => "$input_databases_operation",
	"SPLICING" => "$input_databases_splicing",
);

%annovarparameters_single = (
	"DOT2UNDERLINE" => "$input_general_settings_dot2underline",
	"NASTRING" => "$input_general_settings_nastring",
	"OTHERINFO" => "$input_general_settings_otherinfo",
	"POLISH" => "$input_general_settings_polish",
	"REMOVE" => "$input_general_settings_remove",
	"THREAD" => "$input_general_settings_thread",
	"INPUTFORMAT" => "$input_general_settings_inputformat",
);

#parameter not allowed to be edited in yml
@readonly = ("hg19","human","mouse","mm10","annovar","VERSION","TABLE_ANNOVAR","refgene","cosmic");


#converting input strings into input arrays
@inprotocol = split (",", $inprotocol);
@inprotocolversion = split (",", $inprotocolversion);
@input_databases_colswanted = split (",", $input_databases_colswanted);
@input_databases_exonicsplic = split (",", $input_databases_exonicsplic);
@input_databases_hgvs = split (",", $input_databases_hgvs );
@input_databases_operation = split (",", $input_databases_operation);
@input_databases_splicing = split (",", $input_databases_splicing);

#$in = $inyml;
#openyml_read($in);
#$in = $inyml;
#load($in);

#check if combination of application,species, build and protocol exists
#$count = "0";
#%in = $yml_hash;
#foreach (@inprotocol) {
#print "protocol $inprotocol[$count]\n";  
#$pattern = "$application:ANALYSIS:$input_annovar_species:$input_annovar_build:DATABASES:$inprotocol[$count]$inprotocolversion[$count]:NAME $inprotocol[$count]$inprotocolversion[$count]";
#@pattern = ("GENERAL:species", "GENERAL:build", "GENERAL:protocol");
#foreach @pattern {
#  parse($in, $_);
#  if ($match =~ /^1$/) { print "Present!\n"; };
#  $count++;
#  }
#}

#$in = $inyml;
#openyml_read($in);
#$in = $inyml;
#load_edit($in);

#Editing section first single values, next arrays
#print "Starting editing of strings...\n";
#foreach my $key (keys %annovarparameters_single) {
#    my $innermatch = "0";
    #print "Key: $key\n";
#    foreach my $innerkey (@readonly) {
#        if ($key =~ /^$innerkey$/) {
	    #print "Value not allowed to be changed!\n";  
#            $innermatch++; };
#        }
#    if ($innermatch >= 1) {
#	print "Skipping: $key\n";
	#Do nothing
#    } else { 
#    my $value = $annovarparameters_single{$key};
#    if ($value !~ /^$/) {
#        print "key:$key value:$value\n";
#        $pattern = "^($application:.{1,}:$key)";
#	print "pattern: $pattern\n";
#        %in = $yml_hash_edit; 
#        parse($in, $pattern);
#        print "editing value...: $match_value\n";
#        @match_value = split (':', $match_value);
# 	 my $max = scalar @match_value;
# 	 $count = "0";
#        foreach (@match_value) {
#	   if ($count == $max-1) {
#               print "$_\n";
#	       $_ =~ s/^.{1,}$/$value/g;
#	       print "after: $_ \n";
#	   }
#	   $count++;
#	}
#	print "max $max\n";
#        if ( $max == 4 ) {
#            $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]} = $match_value[3]
#        }
#    }
#    } 
#}

#print "\n";
#my $checkkey;

#allow processing of arrays as well
#print "Starting editing of array values...\n";
#foreach my $key ( keys %annovarparameters_array) {
#    $count = "0";
    #print "COUNT: $count\n";
    #print "KEY: $key\n";
#    my @key = split (',', $key);
#    foreach my $innerkey (@key) {
#        my $innermatch = "0";
#        foreach $checkkey (@readonly) {
#            if ($innerkey =~ /^$checkkey$/) {
                #Do nothing
#                $innermatch++; };
#            }
#        if ($innermatch >= 1) {
#            print "innerkey: $innerkey skipped\n";

#       } else {
#        my $value = $annovarparameters_array{$key};
#	if ($value !~ /^$/ ) {
#	    print "Value: $value\n";
#	    my @value = split (',', $value);
#	    my $innercount = "0";
#            foreach my $innervalue (@value) {
#		if ( $innervalue !~ /^$/ ) {
#	            print "\nkey: $key innerkey: $innerkey check: $checkkey value: $innervalue\n";
#	            print "innercount: $innercount\n";
#	            $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]$inprotocolversion[$innercount]:$key)";
#                    print "pattern: $pattern\n";
#	            %in = $yml_hash_edit;
#	            parse($in, $pattern);
#		    undef %in;
#	            print "editing value...: $match_value\n";
#	                if ($match =~ /^1$/) {
#                            @match_value = split (':', $match_value);
#                            my $max = scalar @match_value;
#                            #$count = "0";
#                            foreach (@match_value) { 
#				print "count: $count\n";
#		                if ($count == $max-1) {
#		                    print "empty_check: $_ \n";
#			            $_ =~ s/^.{1,}$/$value[$innercount]/g;
#			            print "empty_check_after: $_ \n";
#		                }                                   
#		                print "Max: $max\n";
#                                $count++;
#		            }
#			    print "change: $value[$innercount]\n";
#			    print "Max: $max\n";
#                            if ( $max == 9 ) {
#                                $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$match_value[7]} = $match_value[8]; 
#                           }
#	                } else {
#
#	                     if ($match =~ /^0$/) {   
#	                         $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]:$inprotocolversion[$innercount])";
#                                 %in = $yml_hash_edit;
#				 parse($in, $pattern);          
#				 undef %in;
#	                         print "editing value...: $match_value\n";
#	                         if ($match > 1) {
#                                     @match_value = split (':', $match_value); 
#	                             my $max = scalar @match_value;
#				     $count = "0";
#                                     print "Max: $max\n";
#                                     if ( $max == 9 ) {
#                                         $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$innerkey} = "$innervalue";
#                                     }
# 	                         }    
#	                     }
#	                }          
#		} else {
#                #Do nothing no arugment supplied  
#		}
#	    $innercount++;
#	    }
#        } else {
#	#Do nothing no argument supplied
#        }
#	}
#    }
#$count++;
#print "\n";
#}

#print "\nFinished editing...\n";

#Create second yml file with change values originating from cli
#$in = $outyml;
#openyml_write($in);
#%in = $yml_hash_edit; 
#writeyml_edit($in);
#$in = $outyml;
#load($outyml);
#print "Finished loading second yml...\n";

#Load ymlhash with arguments
#$in = $parameter_yml;
#openyml_read($in);
#$in = $parameter_yml;
#load_arguments($in);
#print "Finished loading arguments...\n";


#Fill hashes with input yaml files as defined in input and fixed arguments yaml
openyml_read ($parameter_yml);
openyml_read ($inyml);
load ($inyml, %yml_hash, $yml_hash);
load_arguments ($parameter_yml, %yml_hash_arguments, $yml_hash_arguments);

##Check input yaml files format
##Pickup arguments from YAML to run command

#########################
#!!General parser test!!#
#########################

#Parsing both input YML files to check format, deviations results in killing the process
print "\nParsing Input YAML File\n";
parse ($yml_hash, $data_type); 
print "\nParsing Argument YAML File\n";
parse ($yml_hash_arguments, $data_type); 

#Manually defined values to fill Annovar command
#Possible to extract from YAML using parser
my $application = 'annovar';

#Possible to extract from YAML using parser
my $ncbiRefSeq = 'ncbiRefSeq';
my $cosmic = 'cosmic';
my $dbsnp = 'avsnp';
my $clinvar = 'clinvar';
my $class = 'class';

#Possible to extract from YAML using parser
my $ncbiRefSeq_version = '_UMCU';
my $cosmic_version = '84';
my $dbsnp_version = '150';
my $clinvar_version = '_20180603';
my $class_version = '100519';

#possible to extract from YAML using parser
#species input fixed
my $input_annovar_species = "human";

#manual definition required! --> parameter in plugin galaxy / script hardcoded??
my $language = 'perl ';

#Possible to extract from YAML using parser
my $parse1 = 'parse1';
my $parse2 = 'parse2';

#Possible to extract from YAML
my $application_path = "$application,APPLICATION,GENERAL";

my $ncbiRefSeq_path = "$ncbiRefSeq$ncbiRefSeq_version,NAME,ANALYSIS,DATABASES";
my $cosmic_path = "$cosmic$cosmic_version,NAME,ANALYSIS,DATABASES";
my $dbsnp_path = "$dbsnp$dbsnp_version,NAME,ANALYSIS,DATABASES";
my $clinvar_path = "$clinvar$clinvar_version,NAME,ANALYSIS,DATABASES";
my $class_path = "$class$class_version,NAME,ANALYSIS,DATABASES";

##Code to create folder for database softlinks

my $path_folder_symlinks;
my @symlinkfolder_building_blocks;
my $symlinks_folder_created;

if ( $use_alias =~ /^1$/) {

@symlinkfolder_building_blocks = ("$parse1,LOCATION_DATABASE,$application_path","$parse1,SPECIES,$application_path");
}

#Setup variables for processing parse_look function
my $lookup;
my %input;
my $input;
my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5);

$symlinks_folder_created = '0';


#Code reading species en database location from YAML
if ( $use_alias =~ /^1$/) {

  foreach (@symlinkfolder_building_blocks) {
    $in = $inyml;
    $in2 = $parameter_yml;
    #print "loop1:$_\n";
    my @values = split /\,/, $_;
    foreach (@values) {
      if ( $_ !~ /^parse[12]$/ ) { 
        print "Error while generating soflink folder [$_]...\n";
        $path_folder_symlinks .= "$_";
        last;
      }
      $lookup = "";
      parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
      chomp $lookup;
      $path_folder_symlinks .= "$lookup";
      $lookup = '';
      %input = '';
      $input = '';
      last;
    }
  }

  #Generate base name folder, numbered 0 up to endless to allow specific folder for parrallel analysis

  $symlinks_folder_created = '0';

  $path_folder_symlinks =~ s/^\s+|\s+$//g;
  $folder_symlinks =~ s/^\s+|\s+$//g;

  print "\n";
  print "Folder symlinks: $path_folder_symlinks/$folder_symlinks\n";

  while ($symlinks_folder_created =~ /^0$/ ) {
    if ( -d "$path_folder_symlinks/$folder_symlinks" and -e "$path_folder_symlinks/$folder_symlinks" ) { 
      print "Folder $path_folder_symlinks/$folder_symlinks is in use!\n";
      $folder_symlinks++;
    } else { 
      print "Creating $path_folder_symlinks/$folder_symlinks...\n"; 
      system ( "mkdir $path_folder_symlinks/$folder_symlinks" );
      $path_symlinks = "$path_folder_symlinks/$folder_symlinks/";    
      print "Created $path_symlinks\n";
      $symlinks_folder_created++;
    }
  }
}

##Building annovar command:
	
#Manual definition required
#blocks to build command
my @command_building_blocks = ( 
"$language",
"$parse1,PATHSCRIPTS,$application_path",
"$parse1,TABLE_ANNOVAR,$application_path",
"$invcf",
"$parse1,LOCATION_DATABASE,$application_path",
"$parse1,SPECIES,$application_path",
"$parse1,BUILD,$application_path",
"$parse1,REMOVE,$application_path",
#Manually define arguments using ALIASES or regular NAME argument based on turning ALIAS on or OFF
#Define this in sections scanning the YAML file
"--protocol \'",
"$parse2,ALIAS,$ncbiRefSeq_path",
"\,",
"$parse2,NAME,$cosmic_path",
"\,",
"$parse2,ALIAS,$dbsnp_path",
"\,",
"$parse2,NAME,$clinvar_path",
"\,",
"$parse2,ALIAS,$class_path",
"\' ",
"--operation \'",
"$parse2,OPERATION,$ncbiRefSeq_path",
"\'",
"\,",
"\'",
"$parse2,OPERATION,$cosmic_path",
#"\'\,\'",
"\'",
"\,",
"\'",
"$parse2,OPERATION,$dbsnp_path",
#"\'\,\'",
"\'",
"\,",
"\'",
"$parse2,OPERATION,$clinvar_path",
#"\'\,\'",
"\'",
"\,",
"\'",
"$parse2,OPERATION,$class_path",
"\' ",
"$parse1,DOT2UNDERLINE,$application_path",
"$parse1,OTHERINFO,$application_path",
"$parse1,NASTRING,$application_path",
"$parse1,INPUTFORMAT,$application_path",
"--arg \'",
"$parse2,HGVS,$ncbiRefSeq_path",
"$parse2,SPLICING,$ncbiRefSeq_path",
"$parse2,EXONSPLIC,$ncbiRefSeq_path",
#"\'\,\'",
"\'",
"\,",
"\'",
"$parse2,COLSWANTED,$cosmic_path",
#"\'\,\'",
"\'",
"\,",
"\'",
"$parse2,COLSWANTED,$dbsnp_path",
#"\'\,\'",
"\'",
"\,",
"\'",
"$parse2,COLSWANTED,$clinvar_path",
#"\'\,\'",
"\'",
"\,",
"\'",
"$parse2,COLSWANTED,$class_path",
"\' ",
"$parse1,THREAD,$application_path",
"$parse1,POLISH,$application_path",
"-outfile $outvcf "); 

print "\n";


#Create Empty command
my $test_command = "";

#Start building command to run
foreach (@command_building_blocks) {
  $in = $inyml;
  $in2 = $parameter_yml;
  #print "loop1:$_\n";
  if ( $_ =~ /^,$/ ) {
    print "[NOT YAML] $_ is separator and does not require matching!\n";
    $test_command .= "$_";
    next;
  }
  my @values = split /\,/, $_;
  foreach (@values) {
    if ( $_ !~ /^parse[12]$/ ) { 
      print "[NOT YAML] $_ does not require matching!\n";
      $test_command .= "$_";
      last;
    }
    $lookup = "";
    parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);

    #If alias is enable determine which number is used for this analysis to pickup the correct database files
    if ( $use_alias =~ /^1$/ ) {
      #add code to add folder number for analysis
      if ($values[1]=~/^SPECIES/ )   {
        $lookup =~ s/^\s+|\s+$//g;
        $folder_symlinks;
        $lookup .= "/$folder_symlinks ";
      }
    }
    print "[YAML]: $lookup\n";
    $test_command .= "$lookup";
    $lookup = '';
    $input = '';
    %input = '';
    last;
  }
}

print "\nalias:$use_alias\n";

#Created softlinks voor database files

my @database_info;

if ( $use_alias =~ /^1$/ ) {
  print "Entering Alias stuff..\n";
  #Define databases in use
  @softlink_building_blocks = (
  "$parse2,NAME,$ncbiRefSeq_path",
  "$parse2,NAME,$cosmic_path",
  "$parse2,NAME,$dbsnp_path",
  "$parse2,NAME,$clinvar_path",
  "$parse2,NAME,$class_path" );

  print "\nCreating softlinks to Annovar database files:\n";

  #Use parse_lookup to find argument and create symlinks in analysis database folder
  foreach (@softlink_building_blocks) {
    $in = $inyml;
    $in2 = $parameter_yml;
    my @values = split /\,/, $_;
    foreach (@values) {
      if ( $_ !~ /^parse[12]$/ ) { 
        print "[NOT YAML] $_ does not require matching!\n";
        last;
      }
      $lookup = "";
      parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
      print "database=$lookup\n"; 
      #add code to add folder number for analysis
      #print "Path_destination=$path_symlinks\n";
      #print "Path_origin=$path_folder_symlinks\n";
      my $database_detail = $lookup;

      $lookup = "";
      parse_lookup ("parse1", "BUILD", "$application","APPLICATION","GENERAL", %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
      print "Species: $lookup\n";
      my @build = split /\s/, $lookup;
      #print "Species=$build[1]\n";
      my $species = $build[1];

      $lookup = ''; 
      print "$values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5]\n";
      parse_lookup ($values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type);
      #print "Alias=$lookup\n";
      my $alias = $lookup;

      #Based on database and presence of either index file or not run softlink commands
      if ( $alias =~ /^N\/A$/ ) {
        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$database_detail.txt";
        print "command: $command\n";
        system ( $command );
      } elsif ( $alias !~ /^N\/A$/ ) {
        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$alias.txt";
        print "command2: $command\n";
        system ( $command );
      } 
      if ( $alias =~ /^dbSNP$/ ) {
        my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$alias.txt.idx";
        print "command3: $command\n";
        system ( $command );
      }

      #Push all softlinks used in array
      if ( $alias !~ /^N\/A$/ ) {
        push @database_info, "$alias=$species\_$database_detail";
      }


      $lookup = '';
      $input = '';
      %input = '';
      last;
    }
  }
  print "Done preparing databases!\n";
}

#Run Generated annovar command

print "\nResulting in following command: $test_command\n";
#################################################
#!!IMPORTANT# DECOMMENT FOR ACTIVE USE!!#
system ($test_command);
#################################################

my $addition_vcf;

if ( $use_alias =~ /^1$/ ) {

  #Add translation for common databases to specific version 
  $addition_vcf;
  my $database_details = $path_symlinks.'database_details.txt';
  $addition_vcf .= "##";
  foreach (@database_info) {
    $addition_vcf .= ":$_";
  }  	

  #Add information to vcf output file
  open(OUTVCF_EDIT, '>', "$outvcf\_edit") or die "Could not open file '$outvcf' $!";
  print "\nDone open edit output vcf file...\n";
  open(OUTVCF, '<', "$outvcf") or die "Could not open file '$outvcf' $!";
  print "Done reading vcf output vcf file...\n\n";


  while ( my $line = <OUTVCF> ) {
    my @row = split ( "\t", $line );
    if ( $row[0] =~ /^#CHROM$/ ) {
      print OUTVCF_EDIT "$addition_vcf\n";
      print OUTVCF_EDIT "$line";
    } else {
      print OUTVCF_EDIT "$line";
    }
  }

  close OUTVCF_EDIT;
  close OUTVCF;

  #Replace orginal vcf file with edited vcf file with additional information
  my $mv_command = "mv $outvcf\_edit $outvcf";
  system ($mv_command);

  print "\nAdding line to vcf file:\n";
  print "$addition_vcf\n\n";

  #Remove folder containing softlinks
  print "Removing temporary directory:\n";
  print "rm -r $path_symlinks\n\n";
  system ( "rm -r $path_symlinks" ); 
}

print "Job done program stopping.\n";

exit;

#####################
#!!!END OF SCRIPT!!!#
####################

#############
#SUBROUTINES#
#############

#############
#Subroutine to obtain datatype variable
#############
sub test_type {

my $input = $_[0];
$data_type = ref($input);
return $data_type;

}


#############
#Subroutine to parse and check yaml for correct format
#############
sub parse {

#@inyml = @{$yml_hash->{$pattern4}{$pattern5}};
#@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}};

my $input_hash = $_[0];

my ($key_a, $key_a2, $key_b, $key_b2, $key_c, $key_c2);
my ($value_a, $value_a2, $value_b, $value_b2, $value_c, $value_c2);
my (@hash, @hash2, $hash, $hash2);

my $count_a = '0';
my $count_a2 = '0';
my $count_a3 = '0';
my $count_a4 = '0';
my $count_b = '0';
my $count_b2 = '0';
my $count_b3 = '0';
my $count_b4 = '0';
my $count_c = '0';
my $count_c2 = '0';
my $count_c3 = '0';
my $count_c4 = '0';

for $key_a (keys %$input_hash) { 
  $count_a++;
  for $value_a (values %$input_hash) {
    $count_a2++;
    if ( $count_a =~ /^$count_a2$/ ) {
      test_type($value_a);
      if ($data_type =~ /^HASH$/) {
        for $key_a2 ( keys %{$input_hash->{$key_a}} ) {
          $count_a3++;
	  for $value_a2 ( values  %{$input_hash->{$key_a}} ) {
            $count_a4++;
            if ($count_a3 =~ /^$count_a4$/) {
	      test_type($value_a2);
              if ($data_type =~ /^HASH$/) {
		print "[ERROR1] Found unexpected yml file format [ key: $key_a2 ] , please check whether your YAML file meets requirements!\n";
		exit;
	      } elsif ($data_type =~ /^ARRAY$/) {
                @hash = @{$input_hash->{$key_a}{$key_a2}};
		foreach $hash ( @hash ) {
		  for $key_b ( keys %$hash ) { 
		    $count_b++;
		    for $value_b (values %$hash ) {
                      $count_b2++;
                      if ($count_b =~ /^$count_b2$/) {
                        test_type($value_b);
                        if ( $data_type =~ /^HASH$/ ) {
			  for $key_b2 ( keys %{$hash->{$key_b}} ) { 
			    $count_b3++;
			    for $value_b2 (values %{$hash->{$key_b}} ) {  
		              $count_b4++;
			      if ($count_b3 =~ /^$count_b4/) {
                                test_type($value_b2);
      			        if ( $data_type =~ /^HASH$/ ) {
			        print "[ERROR2] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n";
				exit;
		                } elsif ( $data_type =~ /^ARRAY$/ ) {  
			        print "[ERROR3] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n";
				exit;
			        } else {
				#Assume value level 4
				print "value_b2: $key_a:$key_a2:$key_b:$key_b2:$value_b2\n";
			        }
		              }    
			    }
                            $count_b4 = "0";
                          }
			  $count_b3 = "0";
		        } 
			elsif ( $data_type =~ /^ARRAY$/ ) {
			  print "[ERROR4] Found unexpected yml file format [ key: $key_b ] , please check whether your YAML file meets requirements!\n";
			  exit;
                        } else {     
			  #assume value level 3
			  print "value_b: $key_a:$key_a2:$key_b:$value_b\n";
		        }	
                      }
            	    }
                  $count_b2 = '0';
                  }
	        $count_b = '0';
	        }    
              } else {	
	        #Assume value level 2
		print "Value_a2: $key_a:$value_a2\n";
              }   
            }
          }
        $count_a4 = '0';
        }
      $count_a3 = '0';
      } 
      elsif ($data_type =~ /^ARRAY/) {
        @hash2 = @{$input_hash->{$key_a}}; 
	foreach $hash2 ( @hash2 ) {
	  for my $key_c ( keys %$hash2 ) {
	    $count_c++;
	    for $value_c ( values %$hash2 ) {
              $count_c2++;
	      if ($count_c =~ /^$count_c2$/) {
                test_type($value_c);
                if ( $data_type =~ /^HASH$/ ) {
		  for $key_c2 ( keys %{$hash2->{$key_c}} ) {
		    $count_c3++;
		    for $value_c2 ( values %{$hash2->{$key_c}} ) {
		      $count_c4++;
		      if ( $count_c3 =~ /^$count_c4$/ ) {
                        test_type($value_c2);
                        if ( $data_type =~ /^HASH$/ ) {
                          print "[ERROR5] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n";
		          exit;
		        } elsif ( $data_type =~ /^ARRAY$/ ) {
                          print "[ERROR6] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n";
		          exit;
		        } else { 
			  #Assume level 4
			  print "value_c2: $key_a:$key_a2:$key_c:$key_c2:$value_c2\n";
			}
		      }
		    }
                  $count_c4 = '0';
	          }
                $count_c3 = '0';	  
		} 
		elsif ( $data_type =~ /^ARRAY$/ ) {
		  print "[ERROR7] Found unexpected yml file format [ key: $key_c ] , please check whether your YAML file meets requirements!\n";
		  exit;
		} else {
		  #assume value level 3
                  print "value_c: $key_a:$key_c:$value_c\n";
         	}
              }
	    } 
          $count_c2 = '0';
          }
        $count_c = '0'; 
        }
      } else { 
        #Assume value level 1
	print "value_a: $key_a:$value_a\n";
      }
    }   
  }
$count_a2 = '0';
}
$count_a = '0';

$data_type = "";

}

#########################################
#Subroutine to fetch arguments from YAML#
#########################################

sub parse_lookup {

my $hash;
my %hash;
my $hash_a;
my %hash_a;
my %hash_a2;
my $hash_a2;
my @inyml;
my @inarguments;
my $count;
my $match;
my $match2;
my $argument;
my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5);

if ( $_[0] =~ /^parse1$/ ) {
$pattern1 = $_[1];
$pattern2 = $_[2];
$pattern3 = $_[3];
$pattern4 = $_[4];
@inyml = @{$yml_hash->{$pattern4}};
@inarguments = @{$yml_hash_arguments->{$pattern4}};
} elsif ( $_[0] =~ /^parse2$/) {
$pattern1 = $_[1];
$pattern2 = $_[2];
$pattern3 = $_[3];
$pattern4 = $_[4];
$pattern5 = $_[5];
@inyml = @{$yml_hash->{$pattern4}{$pattern5}};
@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}};
}

$match = "0";

foreach my $hash ( @inyml ) {
  $hash = \%$hash;
  if ($hash->{$pattern3} =~ /^$pattern2$/ ) { 
    my $value = $hash->{$pattern1};
    if ($value eq "" ) {    
      print "Error unknown argument \"$pattern1\" for annotation database \"$pattern2\" in input YAML!\n";
      exit;
    } 
    foreach my $hash_a (@inarguments) {
      my $hash_a2 = $hash_a->{$pattern1};
      if ( $hash_a->{$pattern3} =~ /^$pattern2$/ ) {
        $match2++;
        if ($hash_a2 =~ /^HASH/ ) {
          $argument = $hash_a2->{$value};
        } else {    
        $argument = $hash_a2;
        }      
      if ($argument eq "" ) {
        print "Unknown given option \"$value\" for argument \"$pattern1\" in YAML arguments file or input YAML for database \"$pattern2\"!\n";
	print "Please check your input and start the plugin again.\n";
	exit;
        }	
      }
      if ($match2 =~ /^1$/) { 
        #Do nothing 
      } elsif ( $match2 > 1 ) {
        print "Error Multiple matches for database \"$pattern2\" database in YAML argumentsfile.\n";
	print "Please check your input and start the plugin again.\n";
	exit;
      } elsif ( $match2 =~ /^0$/ ) {
        print "Error no matches for  database \"$pattern2\" in YAML argumentsfile.\n";
	print "Please check your input and start the plugin again.\n";
	exit;
      } 
    }
    $match++;  
    $lookup = $argument;    
  }
}
if ($match =~ /^1$/) {
  #Do nothing 
} else {
  print "match: $match \n";
  print "Error in yaml file multiple instances matching input yaml, please check input!\n";
  exit;
}

return $lookup;

}

#############
#Subroutine for opening out / edited yaml file
#############
sub openyml_write {
my $outyml = $in; 
#Create second yml file with change values originating from cli
open(OUT, '>', "$outyml") or die "Could not open file '$outyml' $!";
print "Done creating output file...\n";
}

#############
#Subroutine to check whether yaml file exists
#############
sub openyml_read { 
$in = $_[0];
chomp $in;
my $inyml = $in;
open(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!";
close FILE;
print "Done reading\n";
}

#############
#Subroutine for reading argument yaml file in hash
##############
sub load_arguments {
$in = $_[0];
$yml_hash_arguments = LoadFile($in);
return($yml_hash_arguments, %yml_hash_arguments);
}

#############
#Subroutine for reading input yaml file in hash
##############
sub load {
$in = $_[0];
$yml_hash = LoadFile($in);
return($yml_hash, %yml_hash);
}

#############
#Subroutine for reading edited input yml file in hash
##############
sub load_edit {
$in = $_[0];
$yml_hash_edit = LoadFile($in);
}

#############
#Subroutine for editing content yaml hash
##############
sub writeyml {
my $yml_hash = $in;
$writer = sub {
    my $line = shift;
    print FILE "$line\n";
};
my $file = Data::YAML::Writer->new;
$file->write( $yml_hash, $writer );
}

#############
#Subroutine for editing content edited yaml hash
##############
sub writeyml_edit {
my $yml_hash = $in;
$writer = sub {
    my $line = shift;
    print OUT "$line\n";
};
my $file = Data::YAML::Writer->new;
$file->write( $yml_hash_edit, $writer );
close OUT;
}