Repository 'annovar'
hg clone https://toolshed.g2.bx.psu.edu/repos/saskia-hiltemann/annovar

Changeset 8:d6af2a78617f (2016-03-04)
Previous changeset 7:69e2067a120d (2015-10-27) Next changeset 9:f7ff063c738e (2016-03-04)
Commit message:
added support for databases upto 4 march 2016
modified:
tools/annovar/annovar.sh
tools/annovar/annovar.xml
b
diff -r 69e2067a120d -r d6af2a78617f tools/annovar/annovar.sh
--- a/tools/annovar/annovar.sh Tue Oct 27 11:01:10 2015 -0400
+++ b/tools/annovar/annovar.sh Fri Mar 04 11:32:50 2016 -0500
[
b'@@ -4,169 +4,168 @@\n dofilter="N"\n \n #########################\n-#\t   DEFINE SOME\n-#\t    FUNCTIONS\n+#       DEFINE SOME\n+#        FUNCTIONS\n #########################\n \n function usage(){\n-\techo "usage: $0 todo"\n+    echo "usage: $0 todo"\n }\n \n function runfilter(){\n-\tifile=$1\t\n-\tcolumnname=$2\n-\tthreshold=$3\n+    ifile=$1\n+    columnname=$2\n+    threshold=$3\n \n-\tif [[ $threshold == "-1" ]]\n-\tthen\n-\t\techo "not filtering"\n-\t\treturn\n-\tfi\n-\t\n-\techo "filtering: $columnname, $threshold"\n-\tcat $ifile\n+    if [[ $threshold == "-1" ]]\n+    then\n+        echo "not filtering"\n+        return\n+    fi\n+    \n+    echo "filtering: $columnname, $threshold"\n+    cat $ifile\n \n-\t#get column number corresponding to column header\n-\tcolumn=`awk \'BEGIN{\n-\t\t\t\t\tFS="\\t";\n-\t\t\t\t\tcol=-1\n-\t\t\t\t}{\n-\t\t\t\t\tif(FNR==1){\n-\t\t\t\t\t\tfor(i=1;i<=NF;i++){\n-\t\t\t\t\t\t\tif($i == "\'"${columnname}"\'") \n-\t\t\t\t\t\t\t\tcol=i \n-\t\t\t\t\t\t} \n-\t\t\t\t\t\tprint col \n-\t\t\t\t\t}\n-\t\t\t\t}\' $ifile `\n+    #get column number corresponding to column header\n+    column=`awk \'BEGIN{\n+                    FS="\\t";\n+                    col=-1\n+                }{\n+                    if(FNR==1){\n+                        for(i=1;i<=NF;i++){\n+                            if($i == "\'"${columnname}"\'") \n+                                col=i \n+                        } \n+                        print col \n+                    }\n+                }\' $ifile `\n \n-\tif [ $column == -1 ]\n-\tthen\n-\t\techo "no such column, exiting"\n-\t\treturn\n-\tfi\t\n+    if [ $column == -1 ]\n+    then\n+        echo "no such column, exiting"\n+        return\n+    fi\n \n-\t#perform filtering using the threshold\n-\tawk \'BEGIN{\n-\t\tFS="\\t";\n-\t\tOFS="\\t";\n-\t}{\n-\t\tif(FNR==1) \n-\t\t\tprint $0; \n-\t\tif(FNR>1){\n-\t\t\tif( $"\'"${column}"\'" == "" )  # empty column, then print\n-\t\t\t\tprint $0\n-\t\t\telse if ("\'"${threshold}"\'" == "text"){}  #if set to text dont check threshold\n-\t\t\t\t\n-\t\t\telse if ($"\'"${column}"\'" < "\'"${threshold}"\'")  #else do check it\n-\t\t\t\tprint $0\t\n-\t\t}\n-\t}\' $ifile > tmpfile\n+    #perform filtering using the threshold\n+    awk \'BEGIN{\n+        FS="\\t";\n+        OFS="\\t";\n+    }{\n+        if(FNR==1) \n+            print $0; \n+        if(FNR>1){\n+            if( $"\'"${column}"\'" == "" )  # empty column, then print\n+                print $0\n+            else if ("\'"${threshold}"\'" == "text"){}  #if set to text dont check threshold\n \n-\tmv tmpfile $ifile\t\n+            else if ($"\'"${column}"\'" < "\'"${threshold}"\'")  #else do check it\n+                print $0\n+        }\n+    }\' $ifile > tmpfile\n+\n+    mv tmpfile $ifile    \n }\n \n # arguments: originalfile,resultfile,chrcol,startcol,endcol,refcol,obscol,addcols\n function joinresults(){\n-\tofile=$1\n-\trfile=$2\n-\tcolchr=$3\n-\tcolstart=$4\n-\tcolend=$5\n-\tcolref=$6\n-\tcolobs=$7\n-\taddcols=$8 #e.g. "B.col1,B.col2"\n-\t\n-\ttest="N"\n-\t\n-\t# echo "joining result with original file"\n-\tif [ $test == "Y" ]\n-\tthen \t\n-\t\techo "ofile: $ofile"\n-\t\thead $ofile \n-\t\techo "rfile: $rfile"\n-\t\thead $rfile\n-\tfi\n-\tnumlines=`wc $rfile | cut -d" " -f2`\n-\t\n-\t# if empty results file, just add header fields\n-\tif [[ ! -s $rfile ]] \n-\tthen\t\t\t\n-\t\tdummycol=${addcols:2}\n-\t\toutputcol=${dummycol//",B."/"\t"}\n-\t\tnumcommas=`echo "$addcols" | grep -o "," | wc -l`\t\t\n-\t\t\n-\t\tawk \'BEGIN{FS="\\t";OFS="\\t"}{\n-\t\t\t\tif(FNR==1)\n-\t\t\t\t\tprint $0,"\'"$outputcol"\'"; \n-\t\t\t\telse{\n-\t\t\t\t\tprintf $0\n-\t\t\t\t\tfor(i=0;i<="\'"$numcommas"\'"+1;i++)\n-\t\t\t\t\t\tprintf "\\t"\n-\t\t\t\t\tprintf "\\n"\n-\t\t\t\t}\n-\t\t\t}END{}\' $ofile > tempofile\n-\t\t\t\n-\t\t\tmv tempofile $ofile\t\t\n-\t\treturn\n-\tfi\n-\t\n+    ofile=$1\n+    rfile=$2\n+    colchr=$3\n+    colstart=$4\n+    colend=$5\n+    colref=$6\n+    colobs=$7\n+    addcols=$8 #e.g. "B.col1,B.col2"\n+\n+    test="N"\n+\n+    # echo "joining result with original file"\n+    if [ $test == "Y" ]\n+    then\n+        echo "ofile: $ofile"\n+        head $ofile \n+        echo "rfile: $rfile"\n+        head $rfile\n+    fi\n+    numlines=`wc $rfile | cut -d" " -f2`\n+\n+    # if empty results file, just add header fields\n+    if [[ ! -s $rfile ]] \n+    then\n+        dummycol=${addcols:2}\n+        outputcol=${dumm'..b'r}_cg46_dropped"\n-\t\tsed -i \'1i\\db\\t\'${cg46_colheader}\'\\tchromosome\\tstart\\tend\\treference\\talleleSeq"\'"$vcfheader"\'"\' $annovarout \n-\t\tjoinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader}\n+    fi\n+    \n+    #cg46\n+    if [[ $cg46 == "Y"  ]]\n+    then\n+        echo -e "\\nCG 46 genomes Annotation"\n+        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg46 annovarinput $humandb 2>&1\n+    \n+        annovarout="annovarinput.${buildver}_cg46_dropped"\n+        sed -i \'1i\\db\\t\'${cg46_colheader}\'\\tchromosome\\tstart\\tend\\treference\\talleleSeq"\'"$vcfheader"\'"\' $annovarout \n+        joinresults originalfile $annovarout 3 4 5 6 7 B.${cg46_colheader}\n \n-\tfi\n+    fi\n \n \n-\t#cg69\n-\tif [[ $cg69 == "Y"  ]]\n-\tthen\n-\t\techo -e "\\nCG 69 genomes Annotation"\n-\t\t$scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1\n-\t\n-\t\tannovarout="annovarinput.${buildver}_cg69_dropped"\n-\t\tsed -i \'1i\\db\\t\'${cg69_colheader}\'\\tchromosome\\tstart\\tend\\treference\\talleleSeq"\'"$vcfheader"\'"\' $annovarout \n-\t\tjoinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader}\n+    #cg69\n+    if [[ $cg69 == "Y"  ]]\n+    then\n+        echo -e "\\nCG 69 genomes Annotation"\n+        $scriptsdir/annotate_variation.pl --filter --buildver $buildver -dbtype cg69 annovarinput $humandb 2>&1\n+    \n+        annovarout="annovarinput.${buildver}_cg69_dropped"\n+        sed -i \'1i\\db\\t\'${cg69_colheader}\'\\tchromosome\\tstart\\tend\\treference\\talleleSeq"\'"$vcfheader"\'"\' $annovarout \n+        joinresults originalfile $annovarout 3 4 5 6 7 B.${cg69_colheader}\n \n-\tfi\n+    fi\n \n \n-\t\n-\tif [ $convertcoords == "Y" ]\n-\tthen\n-\t\techo "converting back coordinates"\n-\t\tawk \'BEGIN{\n-\t\t\t\tFS="\\t";\n-\t\t\t\tOFS="\\t";\n-\t\t\t}{\n-\t\t\t\tif (FNR==1)\n-\t\t\t\t\tprint $0\n-\t\t\t\tif(FNR>1) { \n-\t\t\t\t\t$"\'"${chrcol}"\'" = "chr"$"\'"${chrcol}"\'"\n-\t\t\t\t\tif( $"\'"${vartypecol}"\'" == "snp" ){ $"\'"${startcol}"\'" -= 1 }; \t\n-\t\t\t\t\tif( $"\'"${vartypecol}"\'" == "ins" ){ $"\'"${refcol}"\'" = "" };\t\t\t\n-\t\t\t\t\tif( $"\'"${vartypecol}"\'" == "del" ){ $"\'"${startcol}"\'" -=1; $"\'"${obscol}"\'" = "" };\n-\t\t\t\t\tif( $"\'"${vartypecol}"\'" == "sub" ){ $"\'"${startcol}"\'" -= 1 }; \n-\t\t\t\t\tprint $0\n-\t\t\t\t\t\t\t\t\n-\t\t\t\t}\n-\t\t\t}\t\n-\t\t\tEND{\n-\t\t\t}\' originalfile > originalfile_coords\n-\telse\n-\t\tmv originalfile originalfile_coords\n-\tfi\n+    \n+    if [ $convertcoords == "Y" ]\n+    then\n+        echo "converting back coordinates"\n+        awk \'BEGIN{\n+                FS="\\t";\n+                OFS="\\t";\n+            }{\n+                if (FNR==1)\n+                    print $0\n+                if(FNR>1) { \n+                    $"\'"${chrcol}"\'" = "chr"$"\'"${chrcol}"\'"\n+                    if( $"\'"${vartypecol}"\'" == "snp" ){ $"\'"${startcol}"\'" -= 1 };\n+                    if( $"\'"${vartypecol}"\'" == "ins" ){ $"\'"${refcol}"\'" = "" };\n+                    if( $"\'"${vartypecol}"\'" == "del" ){ $"\'"${startcol}"\'" -=1; $"\'"${obscol}"\'" = "" };\n+                    if( $"\'"${vartypecol}"\'" == "sub" ){ $"\'"${startcol}"\'" -= 1 }; \n+                    print $0\n+                }\n+            }\n+            END{\n+            }\' originalfile > originalfile_coords\n+    else\n+        mv originalfile originalfile_coords\n+    fi\n \n-\t#restore "chr" prefix?\n+    #restore "chr" prefix?\n \n-\t#move to outputfile\n-\tif [ ! -s annovarinput.invalid_input ]\n-\tthen\n-\t\techo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input\n-\tfi\n-\t\n-\tcp originalfile_coords $outfile_all\n-\tcp annovarinput.invalid_input $outfile_invalid 2>&1\n-\t\n-\tsed -i \'s/chrchr/chr/g\' $outfile_all\n-\tsed -i \'s/chrchr/chr/g\' $outfile_invalid\n-\t\n+    #move to outputfile\n+    if [ ! -s annovarinput.invalid_input ]\n+    then\n+        echo "Congrats, your input file contained no invalid lines!" > annovarinput.invalid_input\n+    fi\n+\n+    cp originalfile_coords $outfile_all\n+    cp annovarinput.invalid_input $outfile_invalid 2>&1\n+\n+    sed -i \'s/chrchr/chr/g\' $outfile_all\n+    sed -i \'s/chrchr/chr/g\' $outfile_invalid\n+\n fi #if $dorunannovar\n \n \n'
b
diff -r 69e2067a120d -r d6af2a78617f tools/annovar/annovar.xml
--- a/tools/annovar/annovar.xml Tue Oct 27 11:01:10 2015 -0400
+++ b/tools/annovar/annovar.xml Fri Mar 04 11:32:50 2016 -0500
b
b'@@ -1,233 +1,222 @@\n-<tool id="AnnovarShed" name="ANNOVAR" version="2015oct">\n-\t<description> Annotate a file using ANNOVAR </description>\n-\t\n-\t<requirements>\t\t\n-\t\t<requirement type="package" version="1.7">cgatools</requirement>\n-\t</requirements>\n-\t\n-\t<command interpreter="bash">\n-\t\tannovar.sh\t\t\n-\t\t--esp ${esp}\n-\t\t--gonl ${gonl}\n-\t\t--exac03 ${exac03}\n-\t\t--spidex ${spidex}\n-\t\t--gerp ${gerp}\n-\t\t--cosmic61 ${cosmic61}\n-\t\t--cosmic63 ${cosmic63}\t\n-\t\t--cosmic64 ${cosmic64}\t\t\n-\t\t--cosmic65 ${cosmic65}\n-\t\t--cosmic67 ${cosmic67}\n-\t\t--cosmic68 ${cosmic68}\n-\t\t--outall ${annotated}\t\t\n-\t\t--outinvalid ${invalid}\n-\t\t--dorunannovar ${dorun}\n-\t\t--inputfile ${infile}\n-\t\t--buildver ${reference.fields.dbkey}\n-\t\t--humandb ${reference.fields.ANNOVAR_humandb}\n-\t\t--scriptsdir ${reference.fields.ANNOVAR_scripts}\t\n-\t\t--verdbsnp ${verdbsnp}\n-\t\t--geneanno ${geneanno}\n-\t\t--tfbs ${tfbs}\n-\t\t--mce ${mce}\n-\t\t--cytoband ${cytoband}\n-\t\t--segdup ${segdup}\n-        --dgv ${dgv}\n-\t\t--gwas ${gwas}\t\t\t\t\n-\t\t#if $filetype.type == "other"\n-\t\t\t--varfile N\n-\t\t\t--VCF N\n-\t\t\t--chrcol ${filetype.col_chr}\n-\t\t\t--startcol ${filetype.col_start}\n-\t\t\t--endcol ${filetype.col_end}\n-\t\t\t--obscol ${filetype.col_obs}\n-\t\t\t--refcol ${filetype.col_ref}\n-\t\t\n-\t\t\t#if $filetype.convertcoords.convert == "Y"\n-\t\t\t\t--vartypecol ${filetype.convertcoords.col_vartype}\n-\t\t\t\t--convertcoords Y\n-\t\t\t#else\n-\t\t\t\t--convertcoords N\n-\t\t\t#end if\n-\t\t#end if\n-\t\t#if $filetype.type == "vcf"\n-\t\t\t--varfile N\n-\t\t\t--VCF Y\n-\t\t\t--convertcoords N\n-\t\t#end if\n-\t\t#if $filetype.type == "varfile"\n-\t\t\t--varfile Y\n-\t\t\t--VCF N\t\t\t\n-\t\t#end if\t\t\t\n-\t\t--cg46 ${cgfortysix}\n-\t\t--cg69 ${cgsixtynine}\n-\t\t--ver1000g ${ver1000g}\n-\t\t--hgvs ${hgvs}\n-\t\t--otherinfo ${otherinfo}\n-\t\t--newimpactscores ${newimpactscores}\n-\t\t--clinvar ${clinvar}\n-\t\t\n-\t</command>\n-\t\t\n-\t<inputs>\n-\t\t<param name="dorun" type="hidden" value="Y"/> <!-- will add tool in future to filter on annovar columns, then will call annovar.sh with dorun==N -->\n-\t\t<param name="reference" type="select" label="Reference">\t\t\t        \n-\t\t\t<options from_data_table="annovar_loc" />\t\t\t\t\n-\t\t\t<filter type="data_meta" ref="infile" key="dbkey" column="0"/>\t\t\t\n-\t\t</param>\n-\t\t\t\t\n-\t\t<param name="infile" type="data" label="Select file to annotate" help="Must be either a VCF file, or a CG varfile, or a tab-separated file with a 1 line header"/>\n-\t\t<conditional name="filetype">\n-\t\t\t<param name="type" type="select" label="Select filetype" >\n-\t\t\t\t<option value="vcf" selected="false"> VCF4 file </option>\n-\t\t\t\t<option value="varfile" selected="false"> CG varfile </option>\n-\t\t\t\t<option value="other" selected="false"> Other </option>\n-\t\t\t</param>\n-\t\t\t<when value="other">\n-\t\t\t\t<param name="col_chr"     type="data_column"   data_ref="infile" multiple="False" label="Chromosome Column"  /> \n-\t\t\t\t<param name="col_start"   type="data_column"   data_ref="infile" multiple="False" label="Start Column"  /> \n-\t\t\t\t<param name="col_end"     type="data_column"   data_ref="infile" multiple="False" label="End Column"  /> \n-\t\t\t\t<param name="col_ref"     type="data_column"   data_ref="infile" multiple="False" label="Reference Allele Column"  /> \n-\t\t\t\t<param name="col_obs"     type="data_column"   data_ref="infile" multiple="False" label="Observed Allele Column"  /> \t\n-\t\t\t\t<conditional name="convertcoords">\n-\t\t\t\t\t<param name="convert" type="select" label="Is this file using Complete Genomics (0-based half-open) cooridinates?" >\n-\t\t\t\t\t\t<option value="Y"> Yes </option>\n-\t\t\t\t\t\t<option value="N" selected="True"> No </option>\n-\t\t\t\t\t</param>\n-\t\t\t\t\t<when value="Y">\n-\t\t\t\t\t\t<param name="col_vartype" type="data_column"   data_ref="infile" multiple="False" label="varType Column"  /> \n-\t\t\t\t\t</when>\n-\t\t\t\t</conditional>\n-\t\t\t</when>\n-\t\t</conditional>\n+<tool id="AnnovarShed" name="ANNOVAR" version="2016march">\n+    <description> Annotate a file using ANNOVAR </description>\n+\n+    <requirements>\n+        <requirement type="package" version="1.7">cgatools</requirement>\n+    </requirements>\n+\n+    <command interpreter="bash">\n+        annovar.sh'..b'variants from 34 projects (13K genomes and 64K exomes) "/>\n+        <param name="hrcr1" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with hrcr1 ? (hg19/hg38)" help="40 million variants from 32K samples"/>\n+         \n+        <param name="mitimpact2" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with MITimpact 2 ? (hg19)" help="an exhaustive collection of pre-computed pathogenicity predictions of human mitochondrial non-synonymous variants"/>\n+        <param name="mitimpact24" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with MITimpact 2.4 ? (hg19)" help="an exhaustive collection of pre-computed pathogenicity predictions of human mitochondrial non-synonymous variants"/>\n+        <param name="dbnsfp30a" type="boolean" checked="False" truevalue="Y" falsevalue="N" label="Annotate with dbNSFP 3.0a ? (hg18/hg19/hg38)" help="provides whole-genome functional prediction scores on ~20 different algorithms. Now additions to the database include DANN, PROVEAN, fitConsPlease, etc."/>\n \n-\t\t<!-- prefix for output file so you dont have to manually rename history items -->\n-\t\t<param name="fname" type="text" value="" label="Prefix for your output file" help="Optional"/>\t\t\n-\t\t\t\t\n-\t</inputs>\n+        <!-- prefix for output file so you dont have to manually rename history items -->\n+        <param name="fname" type="text" value="" label="Prefix for your output file" help="Optional"/>\n+    </inputs>\n \n-\t<outputs>\n-\t\t<data format="tabular" name="invalid"   label="$fname ANNOVAR Invalid input on ${on_string}"/>\t\n-\t\t<data format="tabular" name="annotated" label="$fname ANNOVAR Annotated variants on ${on_string}"/>\n-\t</outputs>\n+    <outputs>\n+        <data format="tabular" name="invalid"   label="$fname ANNOVAR Invalid input on ${on_string}"/>\n+        <data format="tabular" name="annotated" label="$fname ANNOVAR Annotated variants on ${on_string}"/>\n+    </outputs>\n \n-\t<help> \n+    <help> \n **What it does**\n \n This tool will annotate a file using ANNOVAR.\n@@ -241,15 +230,15 @@\n **Input Formats**\n \n Input Formats may be one of the following:\n-\t\n+\n VCF file\n Complete Genomics varfile\n \n-Custom tab-delimited file (specify chromosome, start, end, reference allele, observed allele columns)\t\n-\t\n+Custom tab-delimited file (specify chromosome, start, end, reference allele, observed allele columns)    \n+\n Custom tab-delimited CG-derived file (specify chromosome, start, end, reference allele, observed allele, varType columns)\n-\t\t\n-\t\t\n+\n+\n **Database Notes**\n \n see ANNOVAR website for extensive documentation, a few notes on some of the databases:\n@@ -258,10 +247,7 @@\n \n PolyPhen2 HVAR should be used for diagnostics of Mendelian diseases, which requires distinguishing mutations with drastic effects from all the remaining human variation, including abundant mildly deleterious alleles.The authors recommend calling probably damaging if the score is between 0.909 and 1, and possibly damaging if the score is between 0.447 and 0.908, and benign if the score is between 0 and 0.446.\n \n-PolyPhen HDIV should be used when evaluating rare alleles at loci potentially involved in complex phenotypes, dense mapping of regions identified by genome-wide association studies, and analysis of natural selection from sequence data. The authors recommend calling probably damaging if the score is between 0.957 and 1, and possibly damaging if the score is between 0.453 and 0.956, and benign is the score is between 0 and 0.452. \t\t\n-\t\t\n-\t</help>\n+PolyPhen HDIV should be used when evaluating rare alleles at loci potentially involved in complex phenotypes, dense mapping of regions identified by genome-wide association studies, and analysis of natural selection from sequence data. The authors recommend calling probably damaging if the score is between 0.957 and 1, and possibly damaging if the score is between 0.453 and 0.956, and benign is the score is between 0 and 0.452.\n+    </help>\n \n </tool>\n-\n-\n'