Next changeset 1:6c0373cc070f (2017-06-13) |
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727 |
added:
replace_chromosome_names.py replace_chromosome_names.xml test-data/GRCh37_ensembl2UCSC.txt test-data/gemini_load_input.vcf test-data/replace_chromosome_names_output.vcf |
b |
diff -r 000000000000 -r 97c11d04cd4c replace_chromosome_names.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_chromosome_names.py Thu May 18 14:17:48 2017 -0400 |
[ |
@@ -0,0 +1,41 @@ +#!/usr/bin/env python +from __future__ import print_function + +import argparse +import sys + + +def main(): + parser = argparse.ArgumentParser(description='Replace chromosome names in a tabular (e.g. VCF) file using a mapping table.') + parser.add_argument('--cols', required=True, help='comma-separated list of column indexes (starting from 1) on which to perform the replacement') + parser.add_argument('-m', dest='mapping_file', type=argparse.FileType(), required=True, help='mapping table file. Must contain 2 tab-separated columns') + parser.add_argument('--comment-char', help='lines starting with this character will be directly printed to the output file') + parser.add_argument('-o', dest='output', type=argparse.FileType('w'), default=sys.stdout, help='output file. If not specified, writes on standard output') + parser.add_argument('input', metavar='INPUT', type=argparse.FileType(), help='tabular input file') + args = parser.parse_args() + + map_dict = dict() + for line in args.mapping_file: + line = line.rstrip('\r\n') + line_cols = line.split('\t') + if len(line_cols) < 2: + raise Exception("Line '%s' in mapping table file does not contain 2 tab-separated columns" % line) + map_dict[line_cols[0]] = line_cols[1] + + cols_to_map = [int(_) - 1 for _ in args.cols.split(',')] + + for line in args.input: + line = line.rstrip('\r\n') + if args.comment_char and line.startswith(args.comment_char): + print(line, file=args.output) + else: + line_cols = line.split('\t') + for col_to_map in cols_to_map: + old_value = line_cols[col_to_map] + line_cols[col_to_map] = map_dict.get(old_value, old_value) + mapped_line = '\t'.join(line_cols) + print(mapped_line, file=args.output) + + +if __name__ == "__main__": + main() |
b |
diff -r 000000000000 -r 97c11d04cd4c replace_chromosome_names.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/replace_chromosome_names.xml Thu May 18 14:17:48 2017 -0400 |
[ |
@@ -0,0 +1,42 @@ +<tool id="replace_chromosome_names" name="Replace chromosome names" version="0.1"> + <description>in a tabular dataset using a mapping table</description> + <requirements> + <requirement type="package" version="3.6.1">python</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +python '$__tool_directory__/replace_chromosome_names.py' --cols $cols -m '$mapping' --comment-char '$comment_char' -o '$output' '$input' + ]]></command> + <inputs> + <param name="input" type="data" format="tabular" label="Tabular input dataset" /> + <param name="cols" type="data_column" data_ref="input" multiple="true" label="Columns on which to perform the replacement" /> + <param name="mapping" type="data" format="tabular" label="Mapping table" help="Must contain 2 tab-separated columns" /> + <param name="comment_char" type="text" value="" label="Comment character" help="Lines starting with this character will be directly printed to the output file"> + <validator type="length" min="0" max="1" /> + <validator type="expression" message="Must be a punctuation character"><![CDATA[value in '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~']]></validator> + <sanitizer> + <valid initial="string.punctuation"/> + </sanitizer> + </param> + </inputs> + <outputs> + <data name="output" format_source="input" label="${tool.name} on ${on_string}" /> + </outputs> + <tests> + <test> + <param name="input" ftype="vcf" value="gemini_load_input.vcf" /> + <param name="cols" value="1" /> + <param name="mapping" ftype="tabular" value="GRCh37_ensembl2UCSC.txt" /> + <param name="comment_char" value="#" /> + <output name="output" ftype="vcf" file="replace_chromosome_names_output.vcf" /> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Replace chromosome names in a tabular (e.g. VCF) dataset using a mapping table. + +Chromosome mapping tables can be downloaded from: https://github.com/dpryan79/ChromosomeMappings/ + ]]></help> + <citations> + </citations> +</tool> |
b |
diff -r 000000000000 -r 97c11d04cd4c test-data/GRCh37_ensembl2UCSC.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/GRCh37_ensembl2UCSC.txt Thu May 18 14:17:48 2017 -0400 |
b |
@@ -0,0 +1,297 @@ +1 chr1 +2 chr2 +3 chr3 +4 chr4 +5 chr5 +6 chr6 +7 chr7 +8 chr8 +9 chr9 +10 chr10 +11 chr11 +12 chr12 +13 chr13 +14 chr14 +15 chr15 +16 chr16 +17 chr17 +18 chr18 +19 chr19 +20 chr20 +21 chr21 +22 chr22 +X chrX +Y chrY +MT chrM +GL000191.1 chr1_gl000191_random +GL000192.1 chr1_gl000192_random +GL000193.1 chr4_gl000193_random +GL000194.1 chr4_gl000194_random +GL000195.1 chr7_gl000195_random +GL000196.1 chr8_gl000196_random +GL000197.1 chr8_gl000197_random +GL000198.1 chr9_gl000198_random +GL000199.1 chr9_gl000199_random +GL000200.1 chr9_gl000200_random +GL000201.1 chr9_gl000201_random +GL000202.1 chr11_gl000202_random +GL000203.1 chr17_gl000203_random +GL000204.1 chr17_gl000204_random +GL000205.1 chr17_gl000205_random +GL000206.1 chr17_gl000206_random +GL000207.1 chr18_gl000207_random +GL000208.1 chr19_gl000208_random +GL000209.1 chr19_gl000209_random +GL000210.1 chr21_gl000210_random +GL000211.1 chrUn_gl000211 +GL000212.1 chrUn_gl000212 +GL000213.1 chrUn_gl000213 +GL000214.1 chrUn_gl000214 +GL000215.1 chrUn_gl000215 +GL000216.1 chrUn_gl000216 +GL000217.1 chrUn_gl000217 +GL000218.1 chrUn_gl000218 +GL000219.1 chrUn_gl000219 +GL000220.1 chrUn_gl000220 +GL000221.1 chrUn_gl000221 +GL000222.1 chrUn_gl000222 +GL000223.1 chrUn_gl000223 +GL000224.1 chrUn_gl000224 +GL000225.1 chrUn_gl000225 +GL000226.1 chrUn_gl000226 +GL000227.1 chrUn_gl000227 +GL000228.1 chrUn_gl000228 +GL000229.1 chrUn_gl000229 +GL000230.1 chrUn_gl000230 +GL000231.1 chrUn_gl000231 +GL000232.1 chrUn_gl000232 +GL000233.1 chrUn_gl000233 +GL000234.1 chrUn_gl000234 +GL000235.1 chrUn_gl000235 +GL000236.1 chrUn_gl000236 +GL000237.1 chrUn_gl000237 +GL000238.1 chrUn_gl000238 +GL000239.1 chrUn_gl000239 +GL000240.1 chrUn_gl000240 +GL000241.1 chrUn_gl000241 +GL000242.1 chrUn_gl000242 +GL000243.1 chrUn_gl000243 +GL000244.1 chrUn_gl000244 +GL000245.1 chrUn_gl000245 +GL000246.1 chrUn_gl000246 +GL000247.1 chrUn_gl000247 +GL000248.1 chrUn_gl000248 +GL000249.1 chrUn_gl000249 +HG1007_PATCH +HG1032_PATCH +HG104_HG975_PATCH +HG1063_PATCH +HG1074_PATCH +HG1079_PATCH +HG1082_HG167_PATCH +HG1091_PATCH +HG1133_PATCH +HG1146_PATCH +HG115_PATCH +HG1208_PATCH +HG1211_PATCH +HG122_PATCH +HG1257_PATCH +HG1287_PATCH +HG1292_PATCH +HG1293_PATCH +HG1304_PATCH +HG1308_PATCH +HG1322_PATCH +HG1350_HG959_PATCH +HG1423_PATCH +HG1424_PATCH +HG1425_PATCH +HG1426_PATCH +HG142_HG150_NOVEL_TEST +HG1433_PATCH +HG1434_PATCH +HG1435_PATCH +HG1436_HG1432_PATCH +HG1437_PATCH +HG1438_PATCH +HG1439_PATCH +HG1440_PATCH +HG1441_PATCH +HG1442_PATCH +HG1443_HG1444_PATCH +HG144_PATCH +HG1453_PATCH +HG1458_PATCH +HG1459_PATCH +HG1462_PATCH +HG1463_PATCH +HG1471_PATCH +HG1472_PATCH +HG1473_PATCH +HG1479_PATCH +HG1486_PATCH +HG1487_PATCH +HG1488_PATCH +HG1490_PATCH +HG1497_PATCH +HG14_PATCH +HG1500_PATCH +HG1501_PATCH +HG1502_PATCH +HG151_NOVEL_TEST +HG1591_PATCH +HG1592_PATCH +HG1595_PATCH +HG1699_PATCH +HG174_HG254_PATCH +HG183_PATCH +HG185_PATCH +HG186_PATCH +HG193_PATCH +HG19_PATCH +HG237_PATCH +HG243_PATCH +HG256_PATCH +HG271_PATCH +HG27_PATCH +HG280_PATCH +HG281_PATCH +HG299_PATCH +HG29_PATCH +HG305_PATCH +HG306_PATCH +HG311_PATCH +HG325_PATCH +HG329_PATCH +HG339_PATCH +HG344_PATCH +HG348_PATCH +HG357_PATCH +HG375_PATCH +HG385_PATCH +HG388_HG400_PATCH +HG414_PATCH +HG417_PATCH +HG418_PATCH +HG444_PATCH +HG480_HG481_PATCH +HG497_PATCH +HG506_HG507_HG1000_PATCH +HG50_PATCH +HG531_PATCH +HG536_PATCH +HG544_PATCH +HG686_PATCH +HG706_PATCH +HG729_PATCH +HG730_PATCH +HG736_PATCH +HG745_PATCH +HG747_PATCH +HG748_PATCH +HG75_PATCH +HG79_PATCH +HG7_PATCH +HG858_PATCH +HG865_PATCH +HG871_PATCH +HG873_PATCH +HG883_PATCH +HG905_PATCH +HG944_PATCH +HG946_PATCH +HG953_PATCH +HG957_PATCH +HG962_PATCH +HG971_PATCH +HG979_PATCH +HG987_PATCH +HG989_PATCH +HG990_PATCH +HG991_PATCH +HG995_PATCH +HG996_PATCH +HG998_1_PATCH +HG998_2_PATCH +HG999_1_PATCH +HG999_2_PATCH +HSCHR10_1_CTG2 +HSCHR10_1_CTG5 +HSCHR11_1_CTG1_1 +HSCHR1_1_CTG31 +HSCHR12_1_CTG1 +HSCHR12_1_CTG2 +HSCHR12_1_CTG2_1 +HSCHR12_1_CTG5 +HSCHR12_2_CTG2 +HSCHR12_2_CTG2_1 +HSCHR12_3_CTG2_1 +HSCHR1_2_CTG31 +HSCHR1_3_CTG31 +HSCHR15_1_CTG4 +HSCHR15_1_CTG8 +HSCHR16_1_CTG3_1 +HSCHR16_2_CTG3_1 +HSCHR17_1_CTG1 +HSCHR17_1_CTG4 +HSCHR17_2_CTG4 +HSCHR17_3_CTG4 +HSCHR17_4_CTG4 +HSCHR17_5_CTG4 +HSCHR17_6_CTG4 +HSCHR18_1_CTG1_1 +HSCHR18_1_CTG2 +HSCHR18_1_CTG2_1 +HSCHR18_2_CTG1_1 +HSCHR18_2_CTG2 +HSCHR18_2_CTG2_1 +HSCHR19_1_CTG3 +HSCHR19_1_CTG3_1 +HSCHR19_2_CTG3 +HSCHR19_3_CTG3 +HSCHR19LRC_COX1_CTG1 +HSCHR19LRC_COX2_CTG1 +HSCHR19LRC_LRC_I_CTG1 +HSCHR19LRC_LRC_J_CTG1 +HSCHR19LRC_LRC_S_CTG1 +HSCHR19LRC_LRC_T_CTG1 +HSCHR19LRC_PGF1_CTG1 +HSCHR19LRC_PGF2_CTG1 +HSCHR20_1_CTG1 +HSCHR21_1_CTG1_1 +HSCHR21_2_CTG1_1 +HSCHR21_3_CTG1_1 +HSCHR21_4_CTG1_1 +HSCHR2_1_CTG1 +HSCHR2_1_CTG12 +HSCHR22_1_CTG1 +HSCHR22_1_CTG2 +HSCHR22_2_CTG1 +HSCHR2_2_CTG12 +HSCHR3_1_CTG1 +HSCHR3_1_CTG2_1 +HSCHR4_1_CTG12 +HSCHR4_1_CTG6 +HSCHR4_2_CTG9 +HSCHR5_1_CTG1 +HSCHR5_1_CTG2 +HSCHR5_1_CTG5 +HSCHR5_2_CTG1 +HSCHR5_3_CTG1 +HSCHR6_1_CTG5 +HSCHR6_2_CTG5 +HSCHR7_1_CTG6 +HSCHR9_1_CTG1 +HSCHR9_1_CTG35 +HSCHR9_2_CTG35 +HSCHR9_3_CTG35 +HSCHR4_1 chr4_ctg9_hap1 +HSCHR6_MHC_APD chr6_apd_hap1 +HSCHR6_MHC_COX chr6_cox_hap2 +HSCHR6_MHC_DBB chr6_dbb_hap3 +HSCHR6_MHC_MANN chr6_mann_hap4 +HSCHR6_MHC_MCF chr6_mcf_hap5 +HSCHR6_MHC_QBL chr6_qbl_hap6 +HSCHR6_MHC_SSTO chr6_ssto_hap7 +HSCHR17_1 chr17_ctg5_hap1 |
b |
diff -r 000000000000 -r 97c11d04cd4c test-data/gemini_load_input.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gemini_load_input.vcf Thu May 18 14:17:48 2017 -0400 |
[ |
b'@@ -0,0 +1,70 @@\n+##fileformat=VCFv4.1\n+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">\n+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">\n+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">\n+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">\n+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">\n+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">\n+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">\n+##ALT=<ID=DEL,Description="Deletion">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">\n+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">\n+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">\n+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">\n+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">\n+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">\n+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">\n+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">\n+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">\n+##reference=GRCh37\n+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"\n+##SnpEffCmd="SnpEff GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "\n+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\' ">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+1\t10583\trs58108140\tG\tA\t100.0\tPASS\tAVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER||'..b'=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54421\trs146477069\tA\tG\t100.0\tPASS\tERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54490\trs141149254\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54676\trs2462492\tC\tT\t100.0\tPASS\tLDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54753\trs143174675\tT\tG\t100.0\tPASS\tAA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55164\trs3091274\tC\tA\t100.0\tPASS\tAN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55249\t.\tC\tCTATGG\t443.0\tPASS\tAA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55299\trs10399749\tC\tT\t100.0\tPASS\tRSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55313\trs182462964\tA\tT\t100.0\tPASS\tERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55326\trs3107975\tT\tC\t100.0\tPASS\tAA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55330\trs185215913\tG\tA\t100.0\tPASS\tERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55367\trs190850374\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55388\trs182711216\tC\tT\t100.0\tPASS\tTHETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55394\trs2949420\tT\tA\t100.0\tPASS\tAC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55416\trs193242050\tG\tA\t100.0\tPASS\tAA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55427\trs183189405\tT\tC\t100.0\tPASS\tTHETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55816\trs187434873\tG\tA\t100.0\tPASS\tAN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55850\trs191890754\tC\tG\t100.0\tPASS\tAVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55852\trs184233019\tG\tC\t100.0\tPASS\tTHETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n' |
b |
diff -r 000000000000 -r 97c11d04cd4c test-data/replace_chromosome_names_output.vcf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/replace_chromosome_names_output.vcf Thu May 18 14:17:48 2017 -0400 |
[ |
b'@@ -0,0 +1,70 @@\n+##fileformat=VCFv4.1\n+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">\n+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">\n+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">\n+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">\n+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">\n+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">\n+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">\n+##ALT=<ID=DEL,Description="Deletion">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">\n+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">\n+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">\n+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">\n+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">\n+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">\n+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">\n+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">\n+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">\n+##reference=GRCh37\n+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"\n+##SnpEffCmd="SnpEff GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "\n+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\' ">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+chr1\t10583\trs58108140\tG\tA\t100.0\tPASS\tAVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIE'..b'MR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54421\trs146477069\tA\tG\t100.0\tPASS\tERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54490\trs141149254\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54676\trs2462492\tC\tT\t100.0\tPASS\tLDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54753\trs143174675\tT\tG\t100.0\tPASS\tAA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55164\trs3091274\tC\tA\t100.0\tPASS\tAN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55249\t.\tC\tCTATGG\t443.0\tPASS\tAA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55299\trs10399749\tC\tT\t100.0\tPASS\tRSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55313\trs182462964\tA\tT\t100.0\tPASS\tERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55326\trs3107975\tT\tC\t100.0\tPASS\tAA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55330\trs185215913\tG\tA\t100.0\tPASS\tERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55367\trs190850374\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55388\trs182711216\tC\tT\t100.0\tPASS\tTHETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55394\trs2949420\tT\tA\t100.0\tPASS\tAC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55416\trs193242050\tG\tA\t100.0\tPASS\tAA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55427\trs183189405\tT\tC\t100.0\tPASS\tTHETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55816\trs187434873\tG\tA\t100.0\tPASS\tAN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55850\trs191890754\tC\tG\t100.0\tPASS\tAVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55852\trs184233019\tG\tC\t100.0\tPASS\tTHETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n' |