Mercurial > repos > niels > annovar_yaml_wrapper
view annovar_yaml/annovar_yaml.pl @ 17:ee5053b5e72b draft
Uploaded new version including --paryml
author | niels |
---|---|
date | Mon, 10 Jun 2019 07:25:08 -0400 |
parents | 7f4fb56714e8 |
children | ce16079341bf |
line wrap: on
line source
#/usr/bin/perl #This code is using v4 of the Run_Annovar_vx.pl script! ################## #!!TEST COMMAND!!# ################## #Following input required, input fixed currently to run limited Annovar settings #perl annovar_yaml.pl --inyml YAML_annovar.yml --invcf invcf.vcf --outvcf outvcf.vcf > log.txt #Future prospects: ability to learn to read YAML files to pick up parameters. Manual definition of command line still required! #perl modules use use YAML; use YAML::Tiny; use YAML::XS 'LoadFile'; use Data::Dumper; #use Data::YAML::Writer; use Getopt::Long; use feature qw(postderef postderef_qq); use feature qw(refaliasing); #no warnings qw(experimental::postderef); use strict; #input required #two modes run and edit #Basis Yaml file [default required] #Yaml out file [required in edit mode] #species [default required] #build [default required] #protocol [default required] #protocolversion [default required] #basic paramters #Switch to enable disable Aliasses; enable=1 disable=0 my $use_alias = "1"; my $edit = "0"; my $run = "0"; my $application = "0"; #arguments yaml file is fixed! my $parameter_yml = "YAML_arguments_annovar.yml"; my $inyml; my $outyml; my $invcf; my $outvcf; my $yml_hash_arguments; my %yml_hash_arguments; my $yml_hash; my %yml_hash; my $yml_hash_edit; my %yml_hash_edit; my $in; my $in2; my %in; my %in2; my $writer; my $lookup; my $data_type; #parameter applications my %annovarparameters_array; my %annovarparameters_single; my $annovarparameters_array; my $annovarparameters_single; #Strings used as input parameters #my $input_general_location_database; #my $input_general_dir_scripts; #my $input_general_location_scripts_coding_annovar; #my $input_general_location_scripts_table_annovar; my $input_annovar_build; my $input_annovar_species; my $inprotocol; my $inprotocolversion; my @inprotocol; my @inprotocolversion; my $input_general_settings_dot2underline; my $input_general_settings_nastring; my $input_general_settings_otherinfo; my $input_general_settings_polish; my $input_general_settings_remove; my $input_general_settings_thread; my $input_general_settings_inputformat; my $input_databases_available; my $input_databases_required; my $input_databases_colswanted; my $input_databases_exonicsplic; my $input_databases_hgvs; my $input_databases_operation; my $input_databases_splicing; my @input_databases_colswanted; my @input_databases_exonicsplic; my @input_databases_hgvs; my @input_databases_operation; my @input_databases_splicing; #other variables my $script; my $type; my $count; my $column_build; my $column_species; my @column_build; my @column_species; my $pattern; my $pattern2; my $match; my $match_value; my @match_value; my @readonly; my %editinyml; my $folder_symlinks = '0'; my $path_symlinks; my @softlink_building_blocks; #Fixed values for searching hashes my $level5 = "DATABASES"; #parameters to build command my $protocol; my @protocol; my $operation; my $argument; GetOptions ( #Required input 'protocol=s' => \$inprotocol, 'protocolversion=s' => \$inprotocolversion, 'script=s' => \$script, 'invcf=s' => \$invcf, 'outvcf=s' => \$outvcf, 'application=s' => \$application, 'inyml=s' => \$inyml, 'outyml=s' => \$outyml, 'build=s' => \$input_annovar_build, 'species=s' => \$input_annovar_species, #Optional input 'edit' => \$edit, 'run' => \$run, #variables in GENERAL 'dot2underline=s' => \$input_general_settings_dot2underline, 'nastring=s' => \$input_general_settings_nastring, 'otherinfo=s' => \$input_general_settings_otherinfo, 'polish=s' => \$input_general_settings_polish, 'remove=s' => \$input_general_settings_remove, 'thread=s' => \$input_general_settings_thread, 'inputformat=s' => \$input_general_settings_inputformat, #variables in DATABASES 'colswanted=s' => \$input_databases_colswanted, 'exonicsplic=s' => \$input_databases_exonicsplic, 'hgvs=s' => \$input_databases_hgvs, 'operation=s' => \$input_databases_operation, 'splicing=s' => \$input_databases_splicing, ); chomp $invcf; chomp $outvcf; #inactive options # 'drequired=s' => \$input_databases_required, # 'davail=s' => \$input_databases_available, # 'gpathdatabase=s' => \$input_general_location_database, # 'gpathscripts=s' => \$input_general_dir_scripts, # 'gpathcodannovar=s' => \$input_general_location_scripts_coding_annovar, # 'gpathtabannovar=s' => \$input_general_location_scripts_table_annovar, #Check input parameters #Do some checks whether all required arguments are given #some code #arrays with all application specific parameters #make sure keys are unique to allow for parsing of yml! %annovarparameters_array = ( "COLSWANTED" => "$input_databases_colswanted", "EXONICSPLIC" => "$input_databases_exonicsplic", "HGVS" => "$input_databases_hgvs", "OPERATION" => "$input_databases_operation", "SPLICING" => "$input_databases_splicing", ); %annovarparameters_single = ( "DOT2UNDERLINE" => "$input_general_settings_dot2underline", "NASTRING" => "$input_general_settings_nastring", "OTHERINFO" => "$input_general_settings_otherinfo", "POLISH" => "$input_general_settings_polish", "REMOVE" => "$input_general_settings_remove", "THREAD" => "$input_general_settings_thread", "INPUTFORMAT" => "$input_general_settings_inputformat", ); #parameter not allowed to be edited in yml @readonly = ("hg19","human","mouse","mm10","annovar","VERSION","TABLE_ANNOVAR","refgene","cosmic"); #converting input strings into input arrays @inprotocol = split (",", $inprotocol); @inprotocolversion = split (",", $inprotocolversion); @input_databases_colswanted = split (",", $input_databases_colswanted); @input_databases_exonicsplic = split (",", $input_databases_exonicsplic); @input_databases_hgvs = split (",", $input_databases_hgvs ); @input_databases_operation = split (",", $input_databases_operation); @input_databases_splicing = split (",", $input_databases_splicing); #$in = $inyml; #openyml_read($in); #$in = $inyml; #load($in); #check if combination of application,species, build and protocol exists #$count = "0"; #%in = $yml_hash; #foreach (@inprotocol) { #print "protocol $inprotocol[$count]\n"; #$pattern = "$application:ANALYSIS:$input_annovar_species:$input_annovar_build:DATABASES:$inprotocol[$count]$inprotocolversion[$count]:NAME $inprotocol[$count]$inprotocolversion[$count]"; #@pattern = ("GENERAL:species", "GENERAL:build", "GENERAL:protocol"); #foreach @pattern { # parse($in, $_); # if ($match =~ /^1$/) { print "Present!\n"; }; # $count++; # } #} #$in = $inyml; #openyml_read($in); #$in = $inyml; #load_edit($in); #Editing section first single values, next arrays #print "Starting editing of strings...\n"; #foreach my $key (keys %annovarparameters_single) { # my $innermatch = "0"; #print "Key: $key\n"; # foreach my $innerkey (@readonly) { # if ($key =~ /^$innerkey$/) { #print "Value not allowed to be changed!\n"; # $innermatch++; }; # } # if ($innermatch >= 1) { # print "Skipping: $key\n"; #Do nothing # } else { # my $value = $annovarparameters_single{$key}; # if ($value !~ /^$/) { # print "key:$key value:$value\n"; # $pattern = "^($application:.{1,}:$key)"; # print "pattern: $pattern\n"; # %in = $yml_hash_edit; # parse($in, $pattern); # print "editing value...: $match_value\n"; # @match_value = split (':', $match_value); # my $max = scalar @match_value; # $count = "0"; # foreach (@match_value) { # if ($count == $max-1) { # print "$_\n"; # $_ =~ s/^.{1,}$/$value/g; # print "after: $_ \n"; # } # $count++; # } # print "max $max\n"; # if ( $max == 4 ) { # $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]} = $match_value[3] # } # } # } #} #print "\n"; #my $checkkey; #allow processing of arrays as well #print "Starting editing of array values...\n"; #foreach my $key ( keys %annovarparameters_array) { # $count = "0"; #print "COUNT: $count\n"; #print "KEY: $key\n"; # my @key = split (',', $key); # foreach my $innerkey (@key) { # my $innermatch = "0"; # foreach $checkkey (@readonly) { # if ($innerkey =~ /^$checkkey$/) { #Do nothing # $innermatch++; }; # } # if ($innermatch >= 1) { # print "innerkey: $innerkey skipped\n"; # } else { # my $value = $annovarparameters_array{$key}; # if ($value !~ /^$/ ) { # print "Value: $value\n"; # my @value = split (',', $value); # my $innercount = "0"; # foreach my $innervalue (@value) { # if ( $innervalue !~ /^$/ ) { # print "\nkey: $key innerkey: $innerkey check: $checkkey value: $innervalue\n"; # print "innercount: $innercount\n"; # $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]$inprotocolversion[$innercount]:$key)"; # print "pattern: $pattern\n"; # %in = $yml_hash_edit; # parse($in, $pattern); # undef %in; # print "editing value...: $match_value\n"; # if ($match =~ /^1$/) { # @match_value = split (':', $match_value); # my $max = scalar @match_value; # #$count = "0"; # foreach (@match_value) { # print "count: $count\n"; # if ($count == $max-1) { # print "empty_check: $_ \n"; # $_ =~ s/^.{1,}$/$value[$innercount]/g; # print "empty_check_after: $_ \n"; # } # print "Max: $max\n"; # $count++; # } # print "change: $value[$innercount]\n"; # print "Max: $max\n"; # if ( $max == 9 ) { # $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$match_value[7]} = $match_value[8]; # } # } else { # # if ($match =~ /^0$/) { # $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]:$inprotocolversion[$innercount])"; # %in = $yml_hash_edit; # parse($in, $pattern); # undef %in; # print "editing value...: $match_value\n"; # if ($match > 1) { # @match_value = split (':', $match_value); # my $max = scalar @match_value; # $count = "0"; # print "Max: $max\n"; # if ( $max == 9 ) { # $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$innerkey} = "$innervalue"; # } # } # } # } # } else { # #Do nothing no arugment supplied # } # $innercount++; # } # } else { # #Do nothing no argument supplied # } # } # } #$count++; #print "\n"; #} #print "\nFinished editing...\n"; #Create second yml file with change values originating from cli #$in = $outyml; #openyml_write($in); #%in = $yml_hash_edit; #writeyml_edit($in); #$in = $outyml; #load($outyml); #print "Finished loading second yml...\n"; #Load ymlhash with arguments #$in = $parameter_yml; #openyml_read($in); #$in = $parameter_yml; #load_arguments($in); #print "Finished loading arguments...\n"; #Fill hashes with input yaml files as defined in input and fixed arguments yaml openyml_read ($parameter_yml); openyml_read ($inyml); load ($inyml, %yml_hash, $yml_hash); load_arguments ($parameter_yml, %yml_hash_arguments, $yml_hash_arguments); ##Check input yaml files format ##Pickup arguments from YAML to run command ######################### #!!General parser test!!# ######################### #Parsing both input YML files to check format, deviations results in killing the process print "\nParsing Input YAML File\n"; parse ($yml_hash, $data_type); print "\nParsing Argument YAML File\n"; parse ($yml_hash_arguments, $data_type); #Manually defined values to fill Annovar command #Possible to extract from YAML using parser my $application = 'annovar'; #Possible to extract from YAML using parser my $ncbiRefSeq = 'ncbiRefSeq'; my $cosmic = 'cosmic'; my $dbsnp = 'avsnp'; my $clinvar = 'clinvar'; my $class = 'class'; #Possible to extract from YAML using parser my $ncbiRefSeq_version = '_UMCU'; my $cosmic_version = '84'; my $dbsnp_version = '150'; my $clinvar_version = '_20180603'; my $class_version = '100519'; #possible to extract from YAML using parser #species input fixed my $input_annovar_species = "human"; #manual definition required! --> parameter in plugin galaxy / script hardcoded?? my $language = 'perl '; #Possible to extract from YAML using parser my $parse1 = 'parse1'; my $parse2 = 'parse2'; #Possible to extract from YAML my $application_path = "$application,APPLICATION,GENERAL"; my $ncbiRefSeq_path = "$ncbiRefSeq$ncbiRefSeq_version,NAME,ANALYSIS,DATABASES"; my $cosmic_path = "$cosmic$cosmic_version,NAME,ANALYSIS,DATABASES"; my $dbsnp_path = "$dbsnp$dbsnp_version,NAME,ANALYSIS,DATABASES"; my $clinvar_path = "$clinvar$clinvar_version,NAME,ANALYSIS,DATABASES"; my $class_path = "$class$class_version,NAME,ANALYSIS,DATABASES"; ##Code to create folder for database softlinks my $path_folder_symlinks; my @symlinkfolder_building_blocks; my $symlinks_folder_created; if ( $use_alias =~ /^1$/) { @symlinkfolder_building_blocks = ("$parse1,LOCATION_DATABASE,$application_path","$parse1,SPECIES,$application_path"); } #Setup variables for processing parse_look function my $lookup; my %input; my $input; my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5); $symlinks_folder_created = '0'; #Code reading species en database location from YAML if ( $use_alias =~ /^1$/) { foreach (@symlinkfolder_building_blocks) { $in = $inyml; $in2 = $parameter_yml; #print "loop1:$_\n"; my @values = split /\,/, $_; foreach (@values) { if ( $_ !~ /^parse[12]$/ ) { print "Error while generating soflink folder [$_]...\n"; $path_folder_symlinks .= "$_"; last; } $lookup = ""; parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); chomp $lookup; $path_folder_symlinks .= "$lookup"; $lookup = ''; %input = ''; $input = ''; last; } } #Generate base name folder, numbered 0 up to endless to allow specific folder for parrallel analysis $symlinks_folder_created = '0'; $path_folder_symlinks =~ s/^\s+|\s+$//g; $folder_symlinks =~ s/^\s+|\s+$//g; print "\n"; print "Folder symlinks: $path_folder_symlinks/$folder_symlinks\n"; while ($symlinks_folder_created =~ /^0$/ ) { if ( -d "$path_folder_symlinks/$folder_symlinks" and -e "$path_folder_symlinks/$folder_symlinks" ) { print "Folder $path_folder_symlinks/$folder_symlinks is in use!\n"; $folder_symlinks++; } else { print "Creating $path_folder_symlinks/$folder_symlinks...\n"; system ( "mkdir $path_folder_symlinks/$folder_symlinks" ); $path_symlinks = "$path_folder_symlinks/$folder_symlinks/"; print "Created $path_symlinks\n"; $symlinks_folder_created++; } } } ##Building annovar command: #Manual definition required #blocks to build command my @command_building_blocks = ( "$language", "$parse1,PATHSCRIPTS,$application_path", "$parse1,TABLE_ANNOVAR,$application_path", "$invcf", "$parse1,LOCATION_DATABASE,$application_path", "$parse1,SPECIES,$application_path", "$parse1,BUILD,$application_path", "$parse1,REMOVE,$application_path", #Manually define arguments using ALIASES or regular NAME argument based on turning ALIAS on or OFF #Define this in sections scanning the YAML file "--protocol \'", "$parse2,ALIAS,$ncbiRefSeq_path", "\,", "$parse2,NAME,$cosmic_path", "\,", "$parse2,ALIAS,$dbsnp_path", "\,", "$parse2,NAME,$clinvar_path", "\,", "$parse2,ALIAS,$class_path", "\' ", "--operation \'", "$parse2,OPERATION,$ncbiRefSeq_path", "\'", "\,", "\'", "$parse2,OPERATION,$cosmic_path", #"\'\,\'", "\'", "\,", "\'", "$parse2,OPERATION,$dbsnp_path", #"\'\,\'", "\'", "\,", "\'", "$parse2,OPERATION,$clinvar_path", #"\'\,\'", "\'", "\,", "\'", "$parse2,OPERATION,$class_path", "\' ", "$parse1,DOT2UNDERLINE,$application_path", "$parse1,OTHERINFO,$application_path", "$parse1,NASTRING,$application_path", "$parse1,INPUTFORMAT,$application_path", "--arg \'", "$parse2,HGVS,$ncbiRefSeq_path", "$parse2,SPLICING,$ncbiRefSeq_path", "$parse2,EXONSPLIC,$ncbiRefSeq_path", #"\'\,\'", "\'", "\,", "\'", "$parse2,COLSWANTED,$cosmic_path", #"\'\,\'", "\'", "\,", "\'", "$parse2,COLSWANTED,$dbsnp_path", #"\'\,\'", "\'", "\,", "\'", "$parse2,COLSWANTED,$clinvar_path", #"\'\,\'", "\'", "\,", "\'", "$parse2,COLSWANTED,$class_path", "\' ", "$parse1,THREAD,$application_path", "$parse1,POLISH,$application_path", "-outfile $outvcf "); print "\n"; #Create Empty command my $test_command = ""; #Start building command to run foreach (@command_building_blocks) { $in = $inyml; $in2 = $parameter_yml; #print "loop1:$_\n"; if ( $_ =~ /^,$/ ) { print "[NOT YAML] $_ is separator and does not require matching!\n"; $test_command .= "$_"; next; } my @values = split /\,/, $_; foreach (@values) { if ( $_ !~ /^parse[12]$/ ) { print "[NOT YAML] $_ does not require matching!\n"; $test_command .= "$_"; last; } $lookup = ""; parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); #If alias is enable determine which number is used for this analysis to pickup the correct database files if ( $use_alias =~ /^1$/ ) { #add code to add folder number for analysis if ($values[1]=~/^SPECIES/ ) { $lookup =~ s/^\s+|\s+$//g; $folder_symlinks; $lookup .= "/$folder_symlinks "; } } print "[YAML]: $lookup\n"; $test_command .= "$lookup"; $lookup = ''; $input = ''; %input = ''; last; } } print "\nalias:$use_alias\n"; #Created softlinks voor database files my @database_info; if ( $use_alias =~ /^1$/ ) { print "Entering Alias stuff..\n"; #Define databases in use @softlink_building_blocks = ( "$parse2,NAME,$ncbiRefSeq_path", "$parse2,NAME,$cosmic_path", "$parse2,NAME,$dbsnp_path", "$parse2,NAME,$clinvar_path", "$parse2,NAME,$class_path" ); print "\nCreating softlinks to Annovar database files:\n"; #Use parse_lookup to find argument and create symlinks in analysis database folder foreach (@softlink_building_blocks) { $in = $inyml; $in2 = $parameter_yml; my @values = split /\,/, $_; foreach (@values) { if ( $_ !~ /^parse[12]$/ ) { print "[NOT YAML] $_ does not require matching!\n"; last; } $lookup = ""; parse_lookup ($values[0], $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); print "database=$lookup\n"; #add code to add folder number for analysis #print "Path_destination=$path_symlinks\n"; #print "Path_origin=$path_folder_symlinks\n"; my $database_detail = $lookup; $lookup = ""; parse_lookup ("parse1", "BUILD", "$application","APPLICATION","GENERAL", %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); print "Species: $lookup\n"; my @build = split /\s/, $lookup; #print "Species=$build[1]\n"; my $species = $build[1]; $lookup = ''; print "$values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5]\n"; parse_lookup ($values[0], 'ALIAS', $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments, $lookup, $data_type); #print "Alias=$lookup\n"; my $alias = $lookup; #Based on database and presence of either index file or not run softlink commands if ( $alias =~ /^N\/A$/ ) { my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$database_detail.txt"; print "command: $command\n"; system ( $command ); } elsif ( $alias !~ /^N\/A$/ ) { my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt $path_symlinks$species\_$alias.txt"; print "command2: $command\n"; system ( $command ); } if ( $alias =~ /^dbSNP$/ ) { my $command = "ln -s $path_folder_symlinks/$species\_$database_detail.txt.idx $path_symlinks$species\_$alias.txt.idx"; print "command3: $command\n"; system ( $command ); } #Push all softlinks used in array if ( $alias !~ /^N\/A$/ ) { push @database_info, "$alias=$species\_$database_detail"; } $lookup = ''; $input = ''; %input = ''; last; } } print "Done preparing databases!\n"; } #Run Generated annovar command print "\nResulting in following command: $test_command\n"; ################################################# #!!IMPORTANT# DECOMMENT FOR ACTIVE USE!!# system ($test_command); ################################################# my $addition_vcf; if ( $use_alias =~ /^1$/ ) { #Add translation for common databases to specific version $addition_vcf; my $database_details = $path_symlinks.'database_details.txt'; $addition_vcf .= "##"; foreach (@database_info) { $addition_vcf .= ":$_"; } #Add information to vcf output file open(OUTVCF_EDIT, '>', "$outvcf\_edit") or die "Could not open file '$outvcf' $!"; print "\nDone open edit output vcf file...\n"; open(OUTVCF, '<', "$outvcf") or die "Could not open file '$outvcf' $!"; print "Done reading vcf output vcf file...\n\n"; while ( my $line = <OUTVCF> ) { my @row = split ( "\t", $line ); if ( $row[0] =~ /^#CHROM$/ ) { print OUTVCF_EDIT "$addition_vcf\n"; print OUTVCF_EDIT "$line"; } else { print OUTVCF_EDIT "$line"; } } close OUTVCF_EDIT; close OUTVCF; #Replace orginal vcf file with edited vcf file with additional information my $mv_command = "mv $outvcf\_edit $outvcf"; system ($mv_command); print "\nAdding line to vcf file:\n"; print "$addition_vcf\n\n"; #Remove folder containing softlinks print "Removing temporary directory:\n"; print "rm -r $path_symlinks\n\n"; system ( "rm -r $path_symlinks" ); } print "Job done program stopping.\n"; exit; ##################### #!!!END OF SCRIPT!!!# #################### ############# #SUBROUTINES# ############# ############# #Subroutine to obtain datatype variable ############# sub test_type { my $input = $_[0]; $data_type = ref($input); return $data_type; } ############# #Subroutine to parse and check yaml for correct format ############# sub parse { #@inyml = @{$yml_hash->{$pattern4}{$pattern5}}; #@inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}}; my $input_hash = $_[0]; my ($key_a, $key_a2, $key_b, $key_b2, $key_c, $key_c2); my ($value_a, $value_a2, $value_b, $value_b2, $value_c, $value_c2); my (@hash, @hash2, $hash, $hash2); my $count_a = '0'; my $count_a2 = '0'; my $count_a3 = '0'; my $count_a4 = '0'; my $count_b = '0'; my $count_b2 = '0'; my $count_b3 = '0'; my $count_b4 = '0'; my $count_c = '0'; my $count_c2 = '0'; my $count_c3 = '0'; my $count_c4 = '0'; for $key_a (keys %$input_hash) { $count_a++; for $value_a (values %$input_hash) { $count_a2++; if ( $count_a =~ /^$count_a2$/ ) { test_type($value_a); if ($data_type =~ /^HASH$/) { for $key_a2 ( keys %{$input_hash->{$key_a}} ) { $count_a3++; for $value_a2 ( values %{$input_hash->{$key_a}} ) { $count_a4++; if ($count_a3 =~ /^$count_a4$/) { test_type($value_a2); if ($data_type =~ /^HASH$/) { print "[ERROR1] Found unexpected yml file format [ key: $key_a2 ] , please check whether your YAML file meets requirements!\n"; exit; } elsif ($data_type =~ /^ARRAY$/) { @hash = @{$input_hash->{$key_a}{$key_a2}}; foreach $hash ( @hash ) { for $key_b ( keys %$hash ) { $count_b++; for $value_b (values %$hash ) { $count_b2++; if ($count_b =~ /^$count_b2$/) { test_type($value_b); if ( $data_type =~ /^HASH$/ ) { for $key_b2 ( keys %{$hash->{$key_b}} ) { $count_b3++; for $value_b2 (values %{$hash->{$key_b}} ) { $count_b4++; if ($count_b3 =~ /^$count_b4/) { test_type($value_b2); if ( $data_type =~ /^HASH$/ ) { print "[ERROR2] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n"; exit; } elsif ( $data_type =~ /^ARRAY$/ ) { print "[ERROR3] Found unexpected yml file format [ key: $key_b2 ] , please check whether your YAML file meets requirements!\n"; exit; } else { #Assume value level 4 print "value_b2: $key_a:$key_a2:$key_b:$key_b2:$value_b2\n"; } } } $count_b4 = "0"; } $count_b3 = "0"; } elsif ( $data_type =~ /^ARRAY$/ ) { print "[ERROR4] Found unexpected yml file format [ key: $key_b ] , please check whether your YAML file meets requirements!\n"; exit; } else { #assume value level 3 print "value_b: $key_a:$key_a2:$key_b:$value_b\n"; } } } $count_b2 = '0'; } $count_b = '0'; } } else { #Assume value level 2 print "Value_a2: $key_a:$value_a2\n"; } } } $count_a4 = '0'; } $count_a3 = '0'; } elsif ($data_type =~ /^ARRAY/) { @hash2 = @{$input_hash->{$key_a}}; foreach $hash2 ( @hash2 ) { for my $key_c ( keys %$hash2 ) { $count_c++; for $value_c ( values %$hash2 ) { $count_c2++; if ($count_c =~ /^$count_c2$/) { test_type($value_c); if ( $data_type =~ /^HASH$/ ) { for $key_c2 ( keys %{$hash2->{$key_c}} ) { $count_c3++; for $value_c2 ( values %{$hash2->{$key_c}} ) { $count_c4++; if ( $count_c3 =~ /^$count_c4$/ ) { test_type($value_c2); if ( $data_type =~ /^HASH$/ ) { print "[ERROR5] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n"; exit; } elsif ( $data_type =~ /^ARRAY$/ ) { print "[ERROR6] Found unexpected yml file format [ key: $key_c2 ] , please check whether your YAML file meets requirements!\n"; exit; } else { #Assume level 4 print "value_c2: $key_a:$key_a2:$key_c:$key_c2:$value_c2\n"; } } } $count_c4 = '0'; } $count_c3 = '0'; } elsif ( $data_type =~ /^ARRAY$/ ) { print "[ERROR7] Found unexpected yml file format [ key: $key_c ] , please check whether your YAML file meets requirements!\n"; exit; } else { #assume value level 3 print "value_c: $key_a:$key_c:$value_c\n"; } } } $count_c2 = '0'; } $count_c = '0'; } } else { #Assume value level 1 print "value_a: $key_a:$value_a\n"; } } } $count_a2 = '0'; } $count_a = '0'; $data_type = ""; } ######################################### #Subroutine to fetch arguments from YAML# ######################################### sub parse_lookup { my $hash; my %hash; my $hash_a; my %hash_a; my %hash_a2; my $hash_a2; my @inyml; my @inarguments; my $count; my $match; my $match2; my $argument; my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5); if ( $_[0] =~ /^parse1$/ ) { $pattern1 = $_[1]; $pattern2 = $_[2]; $pattern3 = $_[3]; $pattern4 = $_[4]; @inyml = @{$yml_hash->{$pattern4}}; @inarguments = @{$yml_hash_arguments->{$pattern4}}; } elsif ( $_[0] =~ /^parse2$/) { $pattern1 = $_[1]; $pattern2 = $_[2]; $pattern3 = $_[3]; $pattern4 = $_[4]; $pattern5 = $_[5]; @inyml = @{$yml_hash->{$pattern4}{$pattern5}}; @inarguments = @{$yml_hash_arguments->{$pattern4}{$pattern5}}; } $match = "0"; foreach my $hash ( @inyml ) { $hash = \%$hash; if ($hash->{$pattern3} =~ /^$pattern2$/ ) { my $value = $hash->{$pattern1}; if ($value eq "" ) { print "Error unknown argument \"$pattern1\" for annotation database \"$pattern2\" in input YAML!\n"; exit; } foreach my $hash_a (@inarguments) { my $hash_a2 = $hash_a->{$pattern1}; if ( $hash_a->{$pattern3} =~ /^$pattern2$/ ) { $match2++; if ($hash_a2 =~ /^HASH/ ) { $argument = $hash_a2->{$value}; } else { $argument = $hash_a2; } if ($argument eq "" ) { print "Unknown given option \"$value\" for argument \"$pattern1\" in YAML arguments file or input YAML for database \"$pattern2\"!\n"; print "Please check your input and start the plugin again.\n"; exit; } } if ($match2 =~ /^1$/) { #Do nothing } elsif ( $match2 > 1 ) { print "Error Multiple matches for database \"$pattern2\" database in YAML argumentsfile.\n"; print "Please check your input and start the plugin again.\n"; exit; } elsif ( $match2 =~ /^0$/ ) { print "Error no matches for database \"$pattern2\" in YAML argumentsfile.\n"; print "Please check your input and start the plugin again.\n"; exit; } } $match++; $lookup = $argument; } } if ($match =~ /^1$/) { #Do nothing } else { print "match: $match \n"; print "Error in yaml file multiple instances matching input yaml, please check input!\n"; exit; } return $lookup; } ############# #Subroutine for opening out / edited yaml file ############# sub openyml_write { my $outyml = $in; #Create second yml file with change values originating from cli open(OUT, '>', "$outyml") or die "Could not open file '$outyml' $!"; print "Done creating output file...\n"; } ############# #Subroutine to check whether yaml file exists ############# sub openyml_read { $in = $_[0]; chomp $in; my $inyml = $in; open(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!"; close FILE; print "Done reading\n"; } ############# #Subroutine for reading argument yaml file in hash ############## sub load_arguments { $in = $_[0]; $yml_hash_arguments = LoadFile($in); return($yml_hash_arguments, %yml_hash_arguments); } ############# #Subroutine for reading input yaml file in hash ############## sub load { $in = $_[0]; $yml_hash = LoadFile($in); return($yml_hash, %yml_hash); } ############# #Subroutine for reading edited input yml file in hash ############## sub load_edit { $in = $_[0]; $yml_hash_edit = LoadFile($in); } ############# #Subroutine for editing content yaml hash ############## sub writeyml { my $yml_hash = $in; $writer = sub { my $line = shift; print FILE "$line\n"; }; my $file = Data::YAML::Writer->new; $file->write( $yml_hash, $writer ); } ############# #Subroutine for editing content edited yaml hash ############## sub writeyml_edit { my $yml_hash = $in; $writer = sub { my $line = shift; print OUT "$line\n"; }; my $file = Data::YAML::Writer->new; $file->write( $yml_hash_edit, $writer ); close OUT; }