Mercurial > repos > niels > annovar_yaml_wrapper
changeset 8:9d6e7d2ddbb7 draft
Uploaded new version of xml file containing dependency for perl 5.22
author | niels |
---|---|
date | Tue, 14 May 2019 05:08:45 -0400 |
parents | 1119dc7a2f67 |
children | 4939ab9e935b |
files | annovar_yaml/YAML_annovar.yml annovar_yaml/YAML_arguments_annovar.yml annovar_yaml/annovar_yaml.pl annovar_yaml/annovar_yaml.xml |
diffstat | 4 files changed, 1250 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annovar_yaml/YAML_annovar.yml Tue May 14 05:08:45 2019 -0400 @@ -0,0 +1,66 @@ +GENERAL: + - APPLICATION: 'annovar' + PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + CODING_ANNOVAR: 'coding_annovar.pl' + TABLE_ANNOVAR: 'table_annovar.pl' + LOCATION_DATABASE: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + DOT2UNDERLINE: 'yes' + NASTRING: '.' + OTHERINFO: 'yes' + POLISH: 'yes' + REMOVE: 'yes' + THREAD: '8' + INPUTFORMAT: 'vcfinput' + SPECIES: 'human' + BUILD: 'hg19' +ANALYSIS: + DATABASES: + - NAME: 'cosmic84' + PROTOCOL: 'cosmic' + VERSION: '84' + COMMENT: '20190221' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '4' + - NAME: 'refgene19' + PROTOCOL: 'refgene' + VERSION: '19' + AVAILABLE: 'yes' + REQUIRED: 'yes' + COMMENT: '20190210' + OPERATION: 'g' + HGVS: 'yes' + SPLICING: '6' + EXONSPLIC: 'yes' + - NAME: 'ncbiRefSeq_UMCU' + PROTOCOL: 'ncbiRefSeq' + VERSION: '_UMCU' + AVAILABLE: 'yes' + REQUIRED: 'yes' + COMMENT: "100519" + OPERATION: 'g' + HGVS: 'yes' + SPLICING: '6' + EXONSPLIC: 'yes' + - NAME: 'avsnp150' + PROTOCOL: 'avsnp' + VERSION: '150' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '1' + - NAME: 'clinvar_20180603' + PROTOCOL: 'clinvar' + VERSION: '_20180603' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '5' + - NAME: 'class100519' + PROTOCOL: 'class' + VERSION: '100519' + AVAILABLE: 'yes' + REQUIRED: 'yes' + OPERATION: 'f' + COLSWANTED: '1'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annovar_yaml/YAML_arguments_annovar.yml Tue May 14 05:08:45 2019 -0400 @@ -0,0 +1,111 @@ +GENERAL: + - APPLICATION: 'annovar' + PATHSCRIPTS: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + CODING_ANNOVAR: 'coding_annovar.pl ' + TABLE_ANNOVAR: 'table_annovar.pl ' + LOCATION_DATABASE: '/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/Annovar/' + DOT2UNDERLINE: + yes: '--dot2underline ' + no: '' + NASTRING: '--nastring . ' + OTHERINFO: + yes: '--otherinfo ' + no: '' + POLISH: + yes: '--polish ' + no: '' + REMOVE: + yes: '--remove ' + no: '' + THREAD: '--thread 8 ' + INPUTFORMAT: + vcfinput: '--vcfinput ' + SPECIES: + human: 'humandb ' + mouse: 'mousedb ' + BUILD: '--buildver hg19 ' +ANALYSIS: + DATABASES: + - NAME: 'cosmic84' + PROTOCOL: 'cosmic' + VERSION: '84' + COMMENT: '20190221' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 4 ' + - NAME: 'refgene19' + PROTOCOL: 'refgene' + VERSION: '19' + COMMENT: '20190210' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'g' + HGVS: + yes: '--hgvs ' + no: '' + SPLICING: '--splicing 6 ' + EXONSPLIC: + yes: '--exonicsplicing ' + no: '' + - NAME: 'ncbiRefSeq_UMCU' + PROTOCOL: 'ncbiRefSeq' + VERSION: '_UMCU' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + COMMENT: "100519" + OPERATION: 'g' + HGVS: + yes: '--hgvs ' + no: '' + SPLICING: '--splicing 6 ' + EXONSPLIC: + yes: '--exonicsplicing ' + no: '' + - NAME: 'avsnp150' + PROTOCOL: 'avsnp' + VERSION: '150' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 1 ' + - NAME: 'clinvar_20180603' + PROTOCOL: 'clinvar' + VERSION: '_20180603' + COMMENT: 'blah' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 5 ' + - NAME: 'class100519' + PROTOCOL: 'class' + VERSION: '100519' + AVAILABLE: + yes: '1' + no: '0' + REQUIRED: + yes: '1' + no: '0' + OPERATION: 'f' + COLSWANTED: '--colswanted 1 ' +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annovar_yaml/annovar_yaml.pl Tue May 14 05:08:45 2019 -0400 @@ -0,0 +1,1047 @@ +#/usr/bin/perl + +#perl modules use +use YAML; +use YAML::Tiny; +use YAML::XS 'LoadFile'; +use Data::Dumper; +#use Data::YAML::Writer; +use Getopt::Long; +use strict; + +#input required +#two modes run and edit +#Basis Yaml file [default required] +#Yaml out file [required in edit mode] +#species [default required] +#build [default required] +#protocol [default required] +#protocolversion [default required] + +#basic paramters +my $edit = "0"; +my $run = "0"; +my $application = "0"; +my $parameter_yml = "YAML_arguments_annovar.yml"; +my $inyml; +my $outyml; +my $invcf; +my $outvcf; +my $yml_hash_arguments; +my %yml_hash_arguments; +my $yml_hash; +my %yml_hash; +my $yml_hash_edit; +my %yml_hash_edit; +my $in; +my $in2; +my %in; +my %in2; +my $writer; + +#parameter applications +my %annovarparameters_array; +my %annovarparameters_single; +my $annovarparameters_array; +my $annovarparameters_single; + +#Strings used as input parameters +#my $input_general_location_database; +#my $input_general_dir_scripts; +#my $input_general_location_scripts_coding_annovar; +#my $input_general_location_scripts_table_annovar; +my $input_annovar_build; +my $input_annovar_species; +my $inprotocol; +my $inprotocolversion; +my @inprotocol; +my @inprotocolversion; + +my $input_general_settings_dot2underline; +my $input_general_settings_nastring; +my $input_general_settings_otherinfo; +my $input_general_settings_polish; +my $input_general_settings_remove; +my $input_general_settings_thread; +my $input_general_settings_inputformat; + +my $input_databases_available; +my $input_databases_required; + +my $input_databases_colswanted; +my $input_databases_exonicsplic; +my $input_databases_hgvs; +my $input_databases_operation; +my $input_databases_splicing; +my @input_databases_colswanted; +my @input_databases_exonicsplic; +my @input_databases_hgvs; +my @input_databases_operation; +my @input_databases_splicing; + +#other variables +my $script; +my $type; +my $count; +my $column_build; +my $column_species; +my @column_build; +my @column_species; +my $pattern; +my $pattern2; +my $match; +my $match_value; +my @match_value; +my @readonly; +my %editinyml; + +#Fixed values for searching hashes +my $level5 = "DATABASES"; + +#parameters to build command +my $protocol; +my @protocol; +my $operation; +my $argument; + +GetOptions ( + #Required input + 'protocol=s' => \$inprotocol, + 'protocolversion=s' => \$inprotocolversion, + 'script=s' => \$script, + 'invcf=s' => \$invcf, + 'outvcf=s' => \$outvcf, + 'application=s' => \$application, + 'inyml=s' => \$inyml, + 'outyml=s' => \$outyml, + 'build=s' => \$input_annovar_build, + 'species=s' => \$input_annovar_species, + #Optional input + 'edit' => \$edit, + 'run' => \$run, + #variables in GENERAL + 'dot2underline=s' => \$input_general_settings_dot2underline, + 'nastring=s' => \$input_general_settings_nastring, + 'otherinfo=s' => \$input_general_settings_otherinfo, + 'polish=s' => \$input_general_settings_polish, + 'remove=s' => \$input_general_settings_remove, + 'thread=s' => \$input_general_settings_thread, + 'inputformat=s' => \$input_general_settings_inputformat, + #variables in DATABASES + 'colswanted=s' => \$input_databases_colswanted, + 'exonicsplic=s' => \$input_databases_exonicsplic, + 'hgvs=s' => \$input_databases_hgvs, + 'operation=s' => \$input_databases_operation, + 'splicing=s' => \$input_databases_splicing, +); + + +#inactive options +# 'drequired=s' => \$input_databases_required, +# 'davail=s' => \$input_databases_available, + +# 'gpathdatabase=s' => \$input_general_location_database, +# 'gpathscripts=s' => \$input_general_dir_scripts, +# 'gpathcodannovar=s' => \$input_general_location_scripts_coding_annovar, +# 'gpathtabannovar=s' => \$input_general_location_scripts_table_annovar, + +#Check input parameters +#Do some checks whether all required arguments are given + +#some code + +#arrays with all application specific parameters +#make sure keys are unique to allow for parsing of yml! + +%annovarparameters_array = ( + "COLSWANTED" => "$input_databases_colswanted", + "EXONICSPLIC" => "$input_databases_exonicsplic", + "HGVS" => "$input_databases_hgvs", + "OPERATION" => "$input_databases_operation", + "SPLICING" => "$input_databases_splicing", +); + +%annovarparameters_single = ( + "DOT2UNDERLINE" => "$input_general_settings_dot2underline", + "NASTRING" => "$input_general_settings_nastring", + "OTHERINFO" => "$input_general_settings_otherinfo", + "POLISH" => "$input_general_settings_polish", + "REMOVE" => "$input_general_settings_remove", + "THREAD" => "$input_general_settings_thread", + "INPUTFORMAT" => "$input_general_settings_inputformat", +); + +#parameter not allowed to be edited in yml +@readonly = ("hg19","human","mouse","mm10","annovar","VERSION","TABLE_ANNOVAR","refgene","cosmic"); + + +#converting string into arrays +@inprotocol = split (",", $inprotocol); +print "version $inprotocolversion\n"; +@inprotocolversion = split (",", $inprotocolversion); +@input_databases_colswanted = split (",", $input_databases_colswanted); +@input_databases_exonicsplic = split (",", $input_databases_exonicsplic); +@input_databases_hgvs = split (",", $input_databases_hgvs ); +@input_databases_operation = split (",", $input_databases_operation); +@input_databases_splicing = split (",", $input_databases_splicing); + +#$in = $inyml; +#openyml_read($in); +#$in = $inyml; +#load($in); + +#check if combination of application,species, build and protocol exists +#$count = "0"; +#%in = $yml_hash; +#foreach (@inprotocol) { +#print "protocol $inprotocol[$count]\n"; +#$pattern = "$application:ANALYSIS:$input_annovar_species:$input_annovar_build:DATABASES:$inprotocol[$count]$inprotocolversion[$count]:NAME $inprotocol[$count]$inprotocolversion[$count]"; +#@pattern = ("GENERAL:species", "GENERAL:build", "GENERAL:protocol"); +#foreach @pattern { +# parse($in, $_); +# if ($match =~ /^1$/) { print "Present!\n"; }; +# $count++; +# } +#} + +#$in = $inyml; +#openyml_read($in); +#$in = $inyml; +#load_edit($in); + +#Editing section first single values, next arrays +#print "Starting editing of strings...\n"; +#foreach my $key (keys %annovarparameters_single) { +# my $innermatch = "0"; + #print "Key: $key\n"; +# foreach my $innerkey (@readonly) { +# if ($key =~ /^$innerkey$/) { + #print "Value not allowed to be changed!\n"; +# $innermatch++; }; +# } +# if ($innermatch >= 1) { +# print "Skipping: $key\n"; + #Do nothing +# } else { +# my $value = $annovarparameters_single{$key}; +# if ($value !~ /^$/) { +# print "key:$key value:$value\n"; +# $pattern = "^($application:.{1,}:$key)"; +# print "pattern: $pattern\n"; +# %in = $yml_hash_edit; +# parse($in, $pattern); +# print "editing value...: $match_value\n"; +# @match_value = split (':', $match_value); +# my $max = scalar @match_value; +# $count = "0"; +# foreach (@match_value) { +# if ($count == $max-1) { +# print "$_\n"; +# $_ =~ s/^.{1,}$/$value/g; +# print "after: $_ \n"; +# } +# $count++; +# } +# print "max $max\n"; +# if ( $max == 4 ) { +# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]} = $match_value[3] +# } +# } +# } +#} + +#print "\n"; +#my $checkkey; + +#allow processing of arrays as well +#print "Starting editing of array values...\n"; +#foreach my $key ( keys %annovarparameters_array) { +# $count = "0"; + #print "COUNT: $count\n"; + #print "KEY: $key\n"; +# my @key = split (',', $key); +# foreach my $innerkey (@key) { +# my $innermatch = "0"; +# foreach $checkkey (@readonly) { +# if ($innerkey =~ /^$checkkey$/) { + #Do nothing +# $innermatch++; }; +# } +# if ($innermatch >= 1) { +# print "innerkey: $innerkey skipped\n"; + +# } else { +# my $value = $annovarparameters_array{$key}; +# if ($value !~ /^$/ ) { +# print "Value: $value\n"; +# my @value = split (',', $value); +# my $innercount = "0"; +# foreach my $innervalue (@value) { +# if ( $innervalue !~ /^$/ ) { +# print "\nkey: $key innerkey: $innerkey check: $checkkey value: $innervalue\n"; +# print "innercount: $innercount\n"; +# $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]$inprotocolversion[$innercount]:$key)"; +# print "pattern: $pattern\n"; +# %in = $yml_hash_edit; +# parse($in, $pattern); +# undef %in; +# print "editing value...: $match_value\n"; +# if ($match =~ /^1$/) { +# @match_value = split (':', $match_value); +# my $max = scalar @match_value; +# #$count = "0"; +# foreach (@match_value) { +# print "count: $count\n"; +# if ($count == $max-1) { +# print "empty_check: $_ \n"; +# $_ =~ s/^.{1,}$/$value[$innercount]/g; +# print "empty_check_after: $_ \n"; +# } +# print "Max: $max\n"; +# $count++; +# } +# print "change: $value[$innercount]\n"; +# print "Max: $max\n"; +# if ( $max == 9 ) { +# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$match_value[7]} = $match_value[8]; +# } +# } else { +# +# if ($match =~ /^0$/) { +# $pattern = "^($application:.{1,}:$input_annovar_species:$input_annovar_build:.{1,}:@inprotocol[$innercount]:$inprotocolversion[$innercount])"; +# %in = $yml_hash_edit; +# parse($in, $pattern); +# undef %in; +# print "editing value...: $match_value\n"; +# if ($match > 1) { +# @match_value = split (':', $match_value); +# my $max = scalar @match_value; +# $count = "0"; +# print "Max: $max\n"; +# if ( $max == 9 ) { +# $yml_hash_edit->{$match_value[0]}{$match_value[1]}{$match_value[2]}{$match_value[3]}{$match_value[4]}{$match_value[5]}{$match_value[6]}{$innerkey} = "$innervalue"; +# } +# } +# } +# } +# } else { +# #Do nothing no arugment supplied +# } +# $innercount++; +# } +# } else { +# #Do nothing no argument supplied +# } +# } +# } +#$count++; +#print "\n"; +#} + +#print "\nFinished editing...\n"; + +#Create second yml file with change values originating from cli +#$in = $outyml; +#openyml_write($in); +#%in = $yml_hash_edit; +#writeyml_edit($in); +#$in = $outyml; +#load($outyml); +#print "Finished loading second yml...\n"; + +#Load ymlhash with arguments +#$in = $parameter_yml; +#openyml_read($in); +#$in = $parameter_yml; +#load_arguments($in); +#print "Finished loading arguments...\n"; + +#Build command for application + +################### stuff to replace with loop + +#Starting with Annovar +#$count = "0"; +#foreach (@inprotocol) { +#$protocol .= "$_$inprotocolversion[$count]"; +#print "protocol $inprotocol\n"; +#print "protocol_version $inprotocolversion\n"; +#if ($count+1 >= scalar @inprotocol) { } else { $protocol .= "','"; }; +#$count++; +#} + +#Make new array with protocol plus version +#@protocol = split (",", $protocol); +#foreach (@protocol) { +#print "print value: $_\n"; +#} + +#Required as input value, not pickup from yaml file! +#$operation = $input_databases_operation; +#$operation =~ s/,/','/g; + +#$count= "0"; +#foreach (@inprotocol) { +#if ( $_ =~ /refgene/ ) { +#print "Entering refgene...\n"; +#$argument .= "$yml_hash_arguments->{$application}{ANALYSIS}{$input_annovar_species}{$input_annovar_build}{DATABASES}{$protocol[$count]}{ARG}{HGVS}{$yml_hash_edit->{$application}{ANALYSIS}{$input_annovar_species}{$input_annovar_build}{DATABASES}{$protocol[$count]}{ARG}{HGVS}} "; +#$argument .= "--splicing $yml_hash_edit->{$application}{ANALYSIS}{$input_annovar_species}{$input_annovar_build}{DATABASES}{$protocol[$count]}{ARG}{SPLICING} "; +#$argument .= "$yml_hash_arguments->{$application}{ANALYSIS}{$input_annovar_species}{$input_annovar_build}{DATABASES}{$protocol[$count]}{ARG}{EXONSPLIC}{$yml_hash_edit->{$application}{ANALYSIS}{$input_annovar_species}{$input_annovar_build}{DATABASES}{$protocol[$count]}{ARG}{EXONSPLIC}}"; +#if ($count+1 >= scalar @inprotocol) { } else { $argument .= "','"; }; +#} elsif ( $_ =~ /cosmic/ ) { +#print "Entering cosmic...\n"; +#$argument .= "--colswanted $yml_hash_edit->{$application}{ANALYSIS}{$input_annovar_species}{$input_annovar_build}{DATABASES}{$protocol[$count]}{ARG}{COLSWANTED}"; +#if ($count+1 >= scalar @inprotocol) { } else { $argument .= "','"; }; +#} +#$count++; +#} + +#print "$argument\n"; + +#Put content to print in arrays +#my @protocol_values = ("$inprotocol","$operation","$argument"); +#my @values = ("$yml_hash_edit->{$application}{GENERAL}{PATHSCRIPTS}","$yml_hash_edit->{$application}{GENERAL}{$script}","$yml_hash_edit->{$application}{GENERAL}{LOCATION_DATABASE}","$input_annovar_build","$yml_hash_arguments->{$application}{GENERAL}{REMOVE}{$yml_hash_edit->{$application}{GENERAL}{REMOVE}}","$yml_hash_arguments->{$application}{GENERAL}{DOT2UNDERLINE}{$yml_hash_edit->{$application}{GENERAL}{DOT2UNDERLINE}}","$yml_hash_arguments->{$application}{GENERAL}{OTHERINFO}{$yml_hash_edit->{$application}{GENERAL}{OTHERINFO}}","$yml_hash_edit->{$application}{GENERAL}{NASTRING}","$yml_hash_arguments->{$application}{GENERAL}{INPUTFORMAT}{$yml_hash_edit->{$application}{GENERAL}{INPUTFORMAT}}","$yml_hash_edit->{$application}{GENERAL}{THREAD}","$yml_hash_arguments->{$application}{GENERAL}{POLISH}{$yml_hash_edit->{$application}{GENERAL}{POLISH}}"); + +#foreach (@protocol_values) { print "line array1: $_\n" }; +#foreach (@values) { print "line array2: $_\n" }; + +#Build actual command +#my $annovar_command = "perl $values[0]$values[1] $invcf $values[2] --buildver $values[3] $values[4] --protocol \'$protocol_values[0]\' --operation \'$protocol_values[1]\' $values[5] $values[6] --nasstring \'$values[7]\' $values[8] --arg \'$protocol_values[2]\' --thread $values[9] $values[10] --outfile $outvcf"; + +################### end of static code + +#Printing/execting command! +#print "$annovar_command\n"; + +#Execute command +#Do some stuff to actual run the command... + +#Build command + +#my @parameters_test = $yml_hash->{ANALYSIS}{DATABASES}; +#foreach (@parameters_test) { +#print "value: $_\n"; +#} +#my $test_hash = $yml_hash->{ }{map}; + +#print "value: $yml_hash->{ANALYSIS}{DATABASES}\n"; + +#Fill hashes with input yaml files +openyml_read ($parameter_yml); +openyml_read ($inyml); +load ($inyml, %yml_hash, $yml_hash); +load_arguments ($parameter_yml, %yml_hash_arguments, $yml_hash_arguments); + +#Building annovar command: +my $application = 'annovar'; + +my $ncbiRefSeq = 'ncbiRefSeq'; +my $cosmic = 'cosmic'; +my $dbsnp = 'avsnp'; +my $clinvar = 'clinvar'; +my $class = 'class'; + +my $ncbiRefSeq_version = '_UMCU'; +my $cosmic_version = '84'; +my $dbsnp_version = '150'; +my $clinvar_version = '_20180603'; +my $class_version = '100519'; + +#species input fixed +my $input_annovar_species = "human"; + +#Defined in input +#my $invcf = "/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/input_vcfs/prepared_vcfs/PA-AI-201812-154T_norm_2.vcf "; +#my $outvcf = "/hpc/cog_bioinf/pathologie/users/snouwens/Annovar_Moldia/input_vcfs/prepared_vcfs/PA-AI-201812-154T_norm_2_out.vcf "; + +my $language = 'perl '; + +my $parse1 = 'parse1'; +my $parse2 = 'parse2'; + +my $application_path = "$application,APPLICATION,GENERAL"; + +my $ncbiRefSeq_path = "$ncbiRefSeq$ncbiRefSeq_version,NAME,ANALYSIS,DATABASES"; +my $cosmic_path = "$cosmic$cosmic_version,NAME,ANALYSIS,DATABASES"; +my $dbsnp_path = "$dbsnp$dbsnp_version,NAME,ANALYSIS,DATABASES"; +my $clinvar_path = "$clinvar$clinvar_version,NAME,ANALYSIS,DATABASES"; +my $class_path = "$class$class_version,NAME,ANALYSIS,DATABASES"; + +#blocks to build command +my @command_building_blocks = ( +"$language", +"$parse1,PATHSCRIPTS,$application_path", +"$parse1,TABLE_ANNOVAR,$application_path", +"$invcf", +"$parse1,LOCATION_DATABASE,$application_path", +"$parse1,SPECIES,$application_path", +"$parse1,BUILD,$application_path", +"$parse1,REMOVE,$application_path", +"--protocol \'", +"$parse2,NAME,$ncbiRefSeq_path", +"\,", +"$parse2,NAME,$cosmic_path", +"\,", +"$parse2,NAME,$dbsnp_path", +"\,", +"$parse2,NAME,$clinvar_path", +"\,", +"$parse2,NAME,$class_path", +"\' ", +"--operation \'", +"$parse2,OPERATION,$ncbiRefSeq_path", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$cosmic_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$dbsnp_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$clinvar_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,OPERATION,$class_path", +"\' ", +"$parse1,DOT2UNDERLINE,$application_path", +"$parse1,OTHERINFO,$application_path", +"$parse1,NASTRING,$application_path", +"$parse1,INPUTFORMAT,$application_path", +"--arg \'", +"$parse2,HGVS,$ncbiRefSeq_path", +"$parse2,SPLICING,$ncbiRefSeq_path", +"$parse2,EXONSPLIC,$ncbiRefSeq_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$cosmic_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$dbsnp_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$clinvar_path", +#"\'\,\'", +"\'", +"\,", +"\'", +"$parse2,COLSWANTED,$class_path", +"\' ", +"$parse1,THREAD,$application_path", +"$parse1,POLISH,$application_path", +"-outfile $outvcf "); + +print "\n"; + +#Setup variables for processing +my $lookup; +my %input; +my $input; +my $match1; +my $in_key; +my $in_value; +my $in_lookup; +my ($pattern1, $pattern2, $pattern3, $pattern4, $pattern5); + +#Empty command +my $test_command = ""; + +foreach (@command_building_blocks) { + $in = $inyml; + $in2 = $parameter_yml; + #print "loop1:$_\n"; + if ( $_ =~ /^,$/ ) { + print "$_ is separator and does not require matching!\n\n"; + $test_command .= "$_"; + next; + } + my @values = split /\,/, $_; + foreach (@values) { + if ( $_ =~ /^parse1/ ) { + #print "parse1: $in, $in2, $values[1], $values[2], $values[3], $values[4], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments\n"; + parse1 ($in, $in2, $values[1], $values[2], $values[3], $values[4], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments); + print "parse1_out:$lookup...\n"; + $test_command .= "$lookup"; + $lookup = ''; + $input = ''; + %input = ''; + print "\n"; + last; + } + elsif ( $_ =~ /^parse2/ ) { + #print "parse2: $in, $in2, $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments\n"; + parse2 ($in, $in2, $values[1], $values[2], $values[3], $values[4], $values[5], %yml_hash, %yml_hash_arguments, $yml_hash, $yml_hash_arguments); + print "parse2_out:$lookup...\n"; + $test_command .= "$lookup"; + $lookup = ''; + $input = ''; + %input = ''; + print "\n"; + last; + } + elsif ( $_ !~ /^parse1$/ && $_ !~ /^parse2$/ ) { print "$_ does not require matching!\n\n"; + $test_command .= "$_"; + last; + } + } +} + +print "\n"; +print "Resulting in following command: $test_command\n"; +system ($test_command); +print "Job done program stopping.\n"; + +#testing parse2 +#%in = $inyml; +#$pattern1 = "PROTOCOL"; +#$pattern2 = "$inprotocol$inprotocolversion"; +#$pattern2 = "$inprotocol"; +#$pattern3 = "NAME"; +#$pattern4 = "ANALYSIS"; +#$pattern5 = "DATABASES"; +#parse2 ($in, $yml_hash_arguments, $pattern1, $pattern2, $pattern3, $pattern4, $pattern5); + +#application is checked as well +#testing parse3 +#%in = $inyml; +#$pattern1 = "OTHERINFO"; +#$pattern2 = $application; #is value is which used for check +#patteern3 = "APPLICATION"; +#$pattern4 = "GENERAL"; +#parse1 ($in, $yml_hash_arguments, $pattern1, $pattern2, $pattern3, $pattern4); + +exit; + +##################### +#!!!END OF SCRIPT!!!# +#################### + +############# +#SUBROUTINES# +############# + +sub check_lookup { +if ($in_lookup =~ /^$/) { + #No argument found error, mismatch between arguments and inyml! + print " mismatch between yml input and arguments yml!\n"; + print "Problem with following values: $in_key: $in_value!\n"; } +elsif ($in_lookup !~ /^$/) { + #argument found in arguments file! + print " found argument!\n"; + print "Following values used: $in_key:$in_value:$in_lookup!\n"; + $match1++; } +} + +############# +#Subroutine to process parsing for 2-layered arrays in yamls +############# +sub parse2 { + +#presetting variables +$input = ''; +%input = ''; +$lookup = ''; +my $find_parameter; +my $find_parameter_result; +my $count_key; +my $count_value; +my $count_inner_key; +my $count_inner_value; +my %test_value_ANALYSIS; +my %test_value_GENERAL; +my $pattern1 = $_[2]; +my $pattern2 = $_[3]; +my $pattern3 = $_[4]; +my $pattern4 = $_[5]; +my $pattern5 = $_[6]; + +print "Searching for matching parameter for: $pattern1..."; + +for my $test_value_ANALYSIS (@{$yml_hash->{$pattern4}{$pattern5}}) { + $count_key = 0; + foreach my $key (keys $test_value_ANALYSIS) { + $count_key++; + $count_value = 1; + foreach my $value (values $test_value_ANALYSIS) { + if ($count_value =~ /^$count_key$/) { + for my $test_value_ANALYSIS_arguments (@{$yml_hash_arguments->{$pattern4}{$pattern5}}) { + $count_inner_key = 0; + foreach my $inner_key (keys $test_value_ANALYSIS_arguments ) { + if ( $key =~ /^$inner_key$/ ) { + $count_inner_key++; + $count_inner_value = 1; + foreach my $inner_value (values $test_value_ANALYSIS_arguments) { + $match1 = 0; + $lookup = ""; + if ( $test_value_ANALYSIS_arguments->{$pattern3} =~ /^$pattern2$/ && $test_value_ANALYSIS->{$pattern3} =~ /^$pattern2$/ ) { + if ( $count_inner_value =~ /^$count_inner_key$/ && $key =~ /^$pattern1$/) { + if ( $test_value_ANALYSIS_arguments->{$key} =~ /^HASH/ ) { + #print "\nIs hash! $key:$value:$inner_key:$inner_value:$pattern1\n"; + $lookup = $test_value_ANALYSIS_arguments->{$key}{$value}; + check_lookup ($in_key = $key,$in_value = $value, $in_lookup = $lookup, $match1); + $in_key = ''; + $in_value = ''; + $in_lookup = ''; + return; + } + elsif ( $test_value_ANALYSIS_arguments->{$key} !~ /^HASH/ ) { + $lookup = $test_value_ANALYSIS_arguments->{$key}; + check_lookup ($in_key = $key,$in_value = $value, $in_lookup = $lookup, $match1); + $in_key = ''; + $in_value = ''; + $in_lookup = ''; + return; + } + } + $count_inner_value++; + } + } + } + } + } + } + $count_value++; + } + } +} + +print " parameter not found!\n"; +print "Problem with following value: $pattern1!\n"; +print "Please check your yaml files for errors!\n"; +print "Aborting...\n"; + +} + +############# +#Subroutine to process parsing of 1-layered array in yamls +############# +sub parse1 { + +#presetting variables +%input = ''; +$lookup = ''; +my $find_parameter; +my $find_parameter_result; +my $count_key; +my $count_value; +my $count_inner_key; +my $count_inner_value; +my %test_value_ANALYSIS; +my %test_value_GENERAL; +my $pattern1 = $_[2]; +my $pattern2 = $_[3]; +my $pattern3 = $_[4]; +my $pattern4 = $_[5]; + +print "Searching for matching parameter for: $pattern1..."; + +foreach my $find_parameter (@{$yml_hash->{$pattern4}}) { + #print "hash: $find_parameter->{$pattern1}\n"; +} + +for my $test_value_ANALYSIS (@{$yml_hash->{$pattern4}}) { + $count_key = 0; + foreach my $key (keys $test_value_ANALYSIS) { + $count_key++; + $count_value = 0; + foreach my $value (values $test_value_ANALYSIS) { + $count_value++; + if ($count_value =~ /^$count_key$/) { + for my $test_value_ANALYSIS_arguments (@{$yml_hash_arguments->{$pattern4}}) { + $count_inner_key = 0; + foreach my $inner_key (keys $test_value_ANALYSIS_arguments ) { + $count_inner_key++; + $count_inner_value = 0; + if ( $key =~ /^$inner_key$/ ) { + foreach my $inner_value (values $test_value_ANALYSIS_arguments) { + $count_inner_value++; + #print "Innervalue: $inner_value $key:$value:$inner_key:$inner_value:$count_key:$count_value:$count_inner_key:$count_inner_value\n"; + $match1 = 0; + $lookup = ""; + if ( $test_value_ANALYSIS_arguments->{$pattern3} =~ /^$pattern2$/ && $test_value_ANALYSIS->{$pattern3} =~ /^$pattern2$/ ) { + if ( $count_inner_value =~ /^$count_inner_key$/ && $key =~ /^$pattern1$/ ) { + if ( $test_value_ANALYSIS_arguments->{$key} =~ /^HASH/ ) { + #print "\nIs hash! $key:$value:$inner_key:$lookup:$match1\n"; + $lookup = $test_value_ANALYSIS_arguments->{$key}{$value}; + check_lookup ($in_key = $key,$in_value = $value, $in_lookup = $lookup, $match1); + $in_key = ''; + $in_value = ''; + $in_lookup = ''; + return; + } + elsif ( $test_value_ANALYSIS_arguments->{$key} !~ /^HASH/ ) { + $lookup = $test_value_ANALYSIS_arguments->{$key}; + #print "Is not a Hash! $lookup\n"; + check_lookup ($in_key = $key,$in_value = $value, $in_lookup = $lookup, $match1); + $in_key = ''; + $in_value = ''; + $in_lookup = ''; + return; + } + } + } + } + } + } + } + } + } + } +} + +print " parameter not found!\n"; +print "Problem with following value: $pattern1!\n"; +print "Please check your yaml files for errors!\n"; +print "Aborting...\n"; + +} + +############# +#subroutine parsing yaml hash +############# +sub openyml_write { +my $outyml = $in; +#Create second yml file with change values originating from cli +open(OUT, '>', "$outyml") or die "Could not open file '$outyml' $!"; +print "Done creating output file...\n"; +} + +############# +# +############# +sub openyml_read { +$in = $_[0]; +chomp $in; +my $inyml = $in; +open(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!"; +close FILE; +print "Done reading\n"; +} + +############# +## +############## +sub load_arguments { +$in = $_[0]; +$yml_hash_arguments = LoadFile($in); +return($yml_hash_arguments, %yml_hash_arguments); +} + +############# +#pen(FILE, '<', "$inyml") or die "Could not open file '$inyml' $!"; +## +############## +sub load { +$in = $_[0]; +$yml_hash = LoadFile($in); +return($yml_hash, %yml_hash); +} + +############# +## +############## +sub load_edit { +$in = $_[0]; +$yml_hash_edit = LoadFile($in); +} + +############# +## +############## +sub writeyml { +my $yml_hash = $in; +$writer = sub { + my $line = shift; + print FILE "$line\n"; +}; +my $file = Data::YAML::Writer->new; +$file->write( $yml_hash, $writer ); +} + +############# +## +############## +sub writeyml_edit { +my $yml_hash = $in; +$writer = sub { + my $line = shift; + print OUT "$line\n"; +}; +my $file = Data::YAML::Writer->new; +$file->write( $yml_hash_edit, $writer ); +close OUT; +} + +############# +## +############## +sub parse { +my $ymlhash = $in; + +print "searching for: $pattern\n"; + +$match = "0"; +$match_value = ''; + +for my $key1 (keys %{$yml_hash}) { +#print "Key1: $key1\n"; +$type = ''; +$type = ref(${$yml_hash}{$key1}); + if (exists $yml_hash->{$key1} || $type =~ /HASH/) { + for my $key2 (keys %{$yml_hash->{$key1}}) { + #print "Key2: $key2\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}); + if (exists $yml_hash->{$key2} || $type =~ /HASH/) { + for my $key3 (keys %{$yml_hash->{$key1}{$key2}}) { + #print "Key3: $key3\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}{$key3}); + if (exists $yml_hash->{$key3} || $type =~ /HASH/) { + for my $key4 (keys %{$yml_hash->{$key1}{$key2}{$key3}}) { + #print "Key4: $key4\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}{$key3}{$key4}); + if (exists $yml_hash->{$key4} || $type =~ /HASH/) { + for my $key5 (keys %{$yml_hash->{$key1}{$key2}{$key3}{$key4}}) { + #print "Key5: $key5\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}{$key3}{$key4}{$key5}); + if (exists $yml_hash->{$key5} || $type =~ /HASH/) { + for my $key6 (keys %{$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}}) { + #print "Key6: $key6\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}); + if (exists $yml_hash->{$key6} || $type =~ /HASH/) { + for my $key7 (keys %{$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}}) { + #print "Key7: $key7\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}); + if (exists $yml_hash->{$key7} || $type =~ /HASH/) { + for my $key8 (keys %{$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}}) { + #print "Key8: $key8\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}); + if (exists $yml_hash->{$key8} || $type =~ /HASH/) { + for my $key9 (keys %{$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}}) { + #print "Key9: $key9\n"; + $type = ''; + $type = ref(${$yml_hash}{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}); + if (exists $yml_hash->{$key9} || $type =~ /HASH/) { + for my $key10 (keys %{$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}}) { + my $value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}{$key10}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9:$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}{$key10}"; + #print "value $key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}{$key10}\n"; + } + #print "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}{$key10}"; + } + } else { + my $value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9:$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}"; + #print "value: $key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}\n"; + } + #print "value: $key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$key9 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}{$key9}\n"; + } + } + } else { + my $value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8:$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}"; + #print "value: $key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}\n"; + } + #print "value: $key1:$key2:$key3:$key4:$key5:$key6:$key7:$key8 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}{$key8}\n"; + } + } + } else { + my $value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$key4:$key5:$key6:$key7:$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}"; + #print "value: $key1:$key2:$key3:$key4:$key5:$key6:$key7 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}\n"; + } + #print "value: $key1:$key2:$key3:$key4:$key5:$key6:$key7 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}{$key7}\n"; + } + } + } else { + my $value = "$key1:$key2:$key3:$key4:$key5:$key6 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$key4:$key5:$key6:$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}"; + #print "value: $key1:$key2:$key3:$key4:$key5:$key6 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}\n"; + } + #print "value: $key1:$key2:$key3:$key4:$key5:$key6 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}{$key6}\n"; + } + } + } else { + my $value = "$key1:$key2:$key3:$key4:$key5 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$key4:$key5:$yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}"; + #print "value: $key1:$key2:$key3:$key4:$key5 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}\n"; + } + #print "value: $key1:$key2:$key3:$key4:$key5 $yml_hash->{$key1}{$key2}{$key3}{$key4}{$key5}\n"; + } + } + } else { + my $value = "$key1:$key2:$key3:$key4 $yml_hash->{$key1}{$key2}{$key3}{$key4}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$key4:$yml_hash->{$key1}{$key2}{$key3}{$key4}"; + #print "value: $key1:$key2:$key3:$key4 $yml_hash->{$key1}{$key2}{$key3}{$key4}\n"; + } + #print "value: $key1:$key2:$key3:$key4 $yml_hash->{$key1}{$key2}{$key3}{$key4}\n"; + } + } + } else { + my $value = "$key1:$key2:$key3 $yml_hash->{$key1}{$key2}{$key3}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$key3:$yml_hash->{$key1}{$key2}{$key3}"; + #print "value: $key1:$key2:$key3 $yml_hash->{$key1}{$key2}{$key3}\n"; + } + #print "value: $key1:$key2:$key3 $yml_hash->{$key1}{$key2}{$key3}\n"; + } + } + } else { + my $value = "$key1:$key2 $yml_hash->{$key1}{$key2}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$key2:$yml_hash->{$key1}{$key2}"; + #print "value: $key1:$key2 $yml_hash->{$key1}{$key2}\n"; + } + #print "value: $key1:$key2 $yml_hash->{$key1}{$key2}\n"; + } + } + } else { + my $value = "$key1 $yml_hash->{$key1}"; + if ( $value =~ /$pattern/ ) { + $match++; + $match_value = "$key1:$yml_hash->{$key1}"; + #print "value: $key1 $yml_hash->{$key1}\n"; + } + #print "value: $key1 $yml_hash->{$key1}\n"; + } +} + +print "Match: $match\n"; + +return $match; +return $match_value; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/annovar_yaml/annovar_yaml.xml Tue May 14 05:08:45 2019 -0400 @@ -0,0 +1,26 @@ +<tool id="annovar_yaml" name="Annovar YAML" version="0.1.0"> + <requirements> + <requirement type="package" version="5.22">perl</requirement> + <requirement type="package" version="1.27">perl-yaml</requirement> + <requirement type="package" version="1.73">perl-yaml-tiny</requirement> + <requirement type="package" version="0.74">perl-yaml-xs</requirement> + <requirement type="package" version="2.173">perl-data-dumper</requirement> + <requirement type="package" version="2.50">perl-getopt-long</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + perl $__tool_directory__/annovar_yaml.pl --invcf $vcf --inyml $yaml --outvcf $output + ]]></command> + + <inputs> + + <param type="data" name="yaml" format="txt" /> + <param type="data" name="vcf" format="vcf" /> + + </inputs> + <outputs> + <data name="output" format="vcf"/> + </outputs> + <help><![CDATA[ + TODO: Fill in help for Annovar YAML tool. perl 5.22 + ]]></help> +</tool>