Repository 'replace_chromosome_names'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/replace_chromosome_names

Changeset 0:97c11d04cd4c (2017-05-18)
Next changeset 1:6c0373cc070f (2017-06-13)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
added:
replace_chromosome_names.py
replace_chromosome_names.xml
test-data/GRCh37_ensembl2UCSC.txt
test-data/gemini_load_input.vcf
test-data/replace_chromosome_names_output.vcf
b
diff -r 000000000000 -r 97c11d04cd4c replace_chromosome_names.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_chromosome_names.py Thu May 18 14:17:48 2017 -0400
[
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+from __future__ import print_function
+
+import argparse
+import sys
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Replace chromosome names in a tabular (e.g. VCF) file using a mapping table.')
+    parser.add_argument('--cols', required=True, help='comma-separated list of column indexes (starting from 1) on which to perform the replacement')
+    parser.add_argument('-m', dest='mapping_file', type=argparse.FileType(), required=True, help='mapping table file. Must contain 2 tab-separated columns')
+    parser.add_argument('--comment-char', help='lines starting with this character will be directly printed to the output file')
+    parser.add_argument('-o', dest='output', type=argparse.FileType('w'), default=sys.stdout, help='output file. If not specified, writes on standard output')
+    parser.add_argument('input', metavar='INPUT', type=argparse.FileType(), help='tabular input file')
+    args = parser.parse_args()
+
+    map_dict = dict()
+    for line in args.mapping_file:
+        line = line.rstrip('\r\n')
+        line_cols = line.split('\t')
+        if len(line_cols) < 2:
+            raise Exception("Line '%s' in mapping table file does not contain 2 tab-separated columns" % line)
+        map_dict[line_cols[0]] = line_cols[1]
+
+    cols_to_map = [int(_) - 1 for _ in args.cols.split(',')]
+
+    for line in args.input:
+        line = line.rstrip('\r\n')
+        if args.comment_char and line.startswith(args.comment_char):
+            print(line, file=args.output)
+        else:
+            line_cols = line.split('\t')
+            for col_to_map in cols_to_map:
+                old_value = line_cols[col_to_map]
+                line_cols[col_to_map] = map_dict.get(old_value, old_value)
+            mapped_line = '\t'.join(line_cols)
+            print(mapped_line, file=args.output)
+
+
+if __name__ == "__main__":
+    main()
b
diff -r 000000000000 -r 97c11d04cd4c replace_chromosome_names.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/replace_chromosome_names.xml Thu May 18 14:17:48 2017 -0400
[
@@ -0,0 +1,42 @@
+<tool id="replace_chromosome_names" name="Replace chromosome names" version="0.1">
+    <description>in a tabular dataset using a mapping table</description>
+    <requirements>
+        <requirement type="package" version="3.6.1">python</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/replace_chromosome_names.py' --cols $cols -m '$mapping' --comment-char '$comment_char' -o '$output' '$input'
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="Tabular input dataset" />
+        <param name="cols" type="data_column" data_ref="input" multiple="true" label="Columns on which to perform the replacement" />
+        <param name="mapping" type="data" format="tabular" label="Mapping table" help="Must contain 2 tab-separated columns" />
+        <param name="comment_char" type="text" value="" label="Comment character" help="Lines starting with this character will be directly printed to the output file">
+            <validator type="length" min="0" max="1" />
+            <validator type="expression" message="Must be a punctuation character"><![CDATA[value in '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~']]></validator>
+            <sanitizer>
+                <valid initial="string.punctuation"/>
+            </sanitizer>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format_source="input" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" ftype="vcf" value="gemini_load_input.vcf" />
+            <param name="cols" value="1" />
+            <param name="mapping" ftype="tabular" value="GRCh37_ensembl2UCSC.txt" />
+            <param name="comment_char" value="#" />
+            <output name="output" ftype="vcf" file="replace_chromosome_names_output.vcf" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+Replace chromosome names in a tabular (e.g. VCF) dataset using a mapping table.
+
+Chromosome mapping tables can be downloaded from: https://github.com/dpryan79/ChromosomeMappings/
+    ]]></help>
+    <citations>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 97c11d04cd4c test-data/GRCh37_ensembl2UCSC.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/GRCh37_ensembl2UCSC.txt Thu May 18 14:17:48 2017 -0400
b
@@ -0,0 +1,297 @@
+1 chr1
+2 chr2
+3 chr3
+4 chr4
+5 chr5
+6 chr6
+7 chr7
+8 chr8
+9 chr9
+10 chr10
+11 chr11
+12 chr12
+13 chr13
+14 chr14
+15 chr15
+16 chr16
+17 chr17
+18 chr18
+19 chr19
+20 chr20
+21 chr21
+22 chr22
+X chrX
+Y chrY
+MT chrM
+GL000191.1 chr1_gl000191_random
+GL000192.1 chr1_gl000192_random
+GL000193.1 chr4_gl000193_random
+GL000194.1 chr4_gl000194_random
+GL000195.1 chr7_gl000195_random
+GL000196.1 chr8_gl000196_random
+GL000197.1 chr8_gl000197_random
+GL000198.1 chr9_gl000198_random
+GL000199.1 chr9_gl000199_random
+GL000200.1 chr9_gl000200_random
+GL000201.1 chr9_gl000201_random
+GL000202.1 chr11_gl000202_random
+GL000203.1 chr17_gl000203_random
+GL000204.1 chr17_gl000204_random
+GL000205.1 chr17_gl000205_random
+GL000206.1 chr17_gl000206_random
+GL000207.1 chr18_gl000207_random
+GL000208.1 chr19_gl000208_random
+GL000209.1 chr19_gl000209_random
+GL000210.1 chr21_gl000210_random
+GL000211.1 chrUn_gl000211
+GL000212.1 chrUn_gl000212
+GL000213.1 chrUn_gl000213
+GL000214.1 chrUn_gl000214
+GL000215.1 chrUn_gl000215
+GL000216.1 chrUn_gl000216
+GL000217.1 chrUn_gl000217
+GL000218.1 chrUn_gl000218
+GL000219.1 chrUn_gl000219
+GL000220.1 chrUn_gl000220
+GL000221.1 chrUn_gl000221
+GL000222.1 chrUn_gl000222
+GL000223.1 chrUn_gl000223
+GL000224.1 chrUn_gl000224
+GL000225.1 chrUn_gl000225
+GL000226.1 chrUn_gl000226
+GL000227.1 chrUn_gl000227
+GL000228.1 chrUn_gl000228
+GL000229.1 chrUn_gl000229
+GL000230.1 chrUn_gl000230
+GL000231.1 chrUn_gl000231
+GL000232.1 chrUn_gl000232
+GL000233.1 chrUn_gl000233
+GL000234.1 chrUn_gl000234
+GL000235.1 chrUn_gl000235
+GL000236.1 chrUn_gl000236
+GL000237.1 chrUn_gl000237
+GL000238.1 chrUn_gl000238
+GL000239.1 chrUn_gl000239
+GL000240.1 chrUn_gl000240
+GL000241.1 chrUn_gl000241
+GL000242.1 chrUn_gl000242
+GL000243.1 chrUn_gl000243
+GL000244.1 chrUn_gl000244
+GL000245.1 chrUn_gl000245
+GL000246.1 chrUn_gl000246
+GL000247.1 chrUn_gl000247
+GL000248.1 chrUn_gl000248
+GL000249.1 chrUn_gl000249
+HG1007_PATCH
+HG1032_PATCH
+HG104_HG975_PATCH
+HG1063_PATCH
+HG1074_PATCH
+HG1079_PATCH
+HG1082_HG167_PATCH
+HG1091_PATCH
+HG1133_PATCH
+HG1146_PATCH
+HG115_PATCH
+HG1208_PATCH
+HG1211_PATCH
+HG122_PATCH
+HG1257_PATCH
+HG1287_PATCH
+HG1292_PATCH
+HG1293_PATCH
+HG1304_PATCH
+HG1308_PATCH
+HG1322_PATCH
+HG1350_HG959_PATCH
+HG1423_PATCH
+HG1424_PATCH
+HG1425_PATCH
+HG1426_PATCH
+HG142_HG150_NOVEL_TEST
+HG1433_PATCH
+HG1434_PATCH
+HG1435_PATCH
+HG1436_HG1432_PATCH
+HG1437_PATCH
+HG1438_PATCH
+HG1439_PATCH
+HG1440_PATCH
+HG1441_PATCH
+HG1442_PATCH
+HG1443_HG1444_PATCH
+HG144_PATCH
+HG1453_PATCH
+HG1458_PATCH
+HG1459_PATCH
+HG1462_PATCH
+HG1463_PATCH
+HG1471_PATCH
+HG1472_PATCH
+HG1473_PATCH
+HG1479_PATCH
+HG1486_PATCH
+HG1487_PATCH
+HG1488_PATCH
+HG1490_PATCH
+HG1497_PATCH
+HG14_PATCH
+HG1500_PATCH
+HG1501_PATCH
+HG1502_PATCH
+HG151_NOVEL_TEST
+HG1591_PATCH
+HG1592_PATCH
+HG1595_PATCH
+HG1699_PATCH
+HG174_HG254_PATCH
+HG183_PATCH
+HG185_PATCH
+HG186_PATCH
+HG193_PATCH
+HG19_PATCH
+HG237_PATCH
+HG243_PATCH
+HG256_PATCH
+HG271_PATCH
+HG27_PATCH
+HG280_PATCH
+HG281_PATCH
+HG299_PATCH
+HG29_PATCH
+HG305_PATCH
+HG306_PATCH
+HG311_PATCH
+HG325_PATCH
+HG329_PATCH
+HG339_PATCH
+HG344_PATCH
+HG348_PATCH
+HG357_PATCH
+HG375_PATCH
+HG385_PATCH
+HG388_HG400_PATCH
+HG414_PATCH
+HG417_PATCH
+HG418_PATCH
+HG444_PATCH
+HG480_HG481_PATCH
+HG497_PATCH
+HG506_HG507_HG1000_PATCH
+HG50_PATCH
+HG531_PATCH
+HG536_PATCH
+HG544_PATCH
+HG686_PATCH
+HG706_PATCH
+HG729_PATCH
+HG730_PATCH
+HG736_PATCH
+HG745_PATCH
+HG747_PATCH
+HG748_PATCH
+HG75_PATCH
+HG79_PATCH
+HG7_PATCH
+HG858_PATCH
+HG865_PATCH
+HG871_PATCH
+HG873_PATCH
+HG883_PATCH
+HG905_PATCH
+HG944_PATCH
+HG946_PATCH
+HG953_PATCH
+HG957_PATCH
+HG962_PATCH
+HG971_PATCH
+HG979_PATCH
+HG987_PATCH
+HG989_PATCH
+HG990_PATCH
+HG991_PATCH
+HG995_PATCH
+HG996_PATCH
+HG998_1_PATCH
+HG998_2_PATCH
+HG999_1_PATCH
+HG999_2_PATCH
+HSCHR10_1_CTG2
+HSCHR10_1_CTG5
+HSCHR11_1_CTG1_1
+HSCHR1_1_CTG31
+HSCHR12_1_CTG1
+HSCHR12_1_CTG2
+HSCHR12_1_CTG2_1
+HSCHR12_1_CTG5
+HSCHR12_2_CTG2
+HSCHR12_2_CTG2_1
+HSCHR12_3_CTG2_1
+HSCHR1_2_CTG31
+HSCHR1_3_CTG31
+HSCHR15_1_CTG4
+HSCHR15_1_CTG8
+HSCHR16_1_CTG3_1
+HSCHR16_2_CTG3_1
+HSCHR17_1_CTG1
+HSCHR17_1_CTG4
+HSCHR17_2_CTG4
+HSCHR17_3_CTG4
+HSCHR17_4_CTG4
+HSCHR17_5_CTG4
+HSCHR17_6_CTG4
+HSCHR18_1_CTG1_1
+HSCHR18_1_CTG2
+HSCHR18_1_CTG2_1
+HSCHR18_2_CTG1_1
+HSCHR18_2_CTG2
+HSCHR18_2_CTG2_1
+HSCHR19_1_CTG3
+HSCHR19_1_CTG3_1
+HSCHR19_2_CTG3
+HSCHR19_3_CTG3
+HSCHR19LRC_COX1_CTG1
+HSCHR19LRC_COX2_CTG1
+HSCHR19LRC_LRC_I_CTG1
+HSCHR19LRC_LRC_J_CTG1
+HSCHR19LRC_LRC_S_CTG1
+HSCHR19LRC_LRC_T_CTG1
+HSCHR19LRC_PGF1_CTG1
+HSCHR19LRC_PGF2_CTG1
+HSCHR20_1_CTG1
+HSCHR21_1_CTG1_1
+HSCHR21_2_CTG1_1
+HSCHR21_3_CTG1_1
+HSCHR21_4_CTG1_1
+HSCHR2_1_CTG1
+HSCHR2_1_CTG12
+HSCHR22_1_CTG1
+HSCHR22_1_CTG2
+HSCHR22_2_CTG1
+HSCHR2_2_CTG12
+HSCHR3_1_CTG1
+HSCHR3_1_CTG2_1
+HSCHR4_1_CTG12
+HSCHR4_1_CTG6
+HSCHR4_2_CTG9
+HSCHR5_1_CTG1
+HSCHR5_1_CTG2
+HSCHR5_1_CTG5
+HSCHR5_2_CTG1
+HSCHR5_3_CTG1
+HSCHR6_1_CTG5
+HSCHR6_2_CTG5
+HSCHR7_1_CTG6
+HSCHR9_1_CTG1
+HSCHR9_1_CTG35
+HSCHR9_2_CTG35
+HSCHR9_3_CTG35
+HSCHR4_1 chr4_ctg9_hap1
+HSCHR6_MHC_APD chr6_apd_hap1
+HSCHR6_MHC_COX chr6_cox_hap2
+HSCHR6_MHC_DBB chr6_dbb_hap3
+HSCHR6_MHC_MANN chr6_mann_hap4
+HSCHR6_MHC_MCF chr6_mcf_hap5
+HSCHR6_MHC_QBL chr6_qbl_hap6
+HSCHR6_MHC_SSTO chr6_ssto_hap7
+HSCHR17_1 chr17_ctg5_hap1
b
diff -r 000000000000 -r 97c11d04cd4c test-data/gemini_load_input.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gemini_load_input.vcf Thu May 18 14:17:48 2017 -0400
[
b'@@ -0,0 +1,70 @@\n+##fileformat=VCFv4.1\n+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">\n+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">\n+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">\n+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">\n+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">\n+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">\n+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">\n+##ALT=<ID=DEL,Description="Deletion">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">\n+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">\n+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">\n+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">\n+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">\n+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">\n+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">\n+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">\n+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">\n+##reference=GRCh37\n+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"\n+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "\n+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\' ">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+1\t10583\trs58108140\tG\tA\t100.0\tPASS\tAVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIER||'..b'=0.9844;LDAF=0.0146;ERATE=0.0058;AF=0.01;ASN_AF=0.01;AMR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54421\trs146477069\tA\tG\t100.0\tPASS\tERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54490\trs141149254\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54676\trs2462492\tC\tT\t100.0\tPASS\tLDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t54753\trs143174675\tT\tG\t100.0\tPASS\tAA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55164\trs3091274\tC\tA\t100.0\tPASS\tAN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55249\t.\tC\tCTATGG\t443.0\tPASS\tAA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55299\trs10399749\tC\tT\t100.0\tPASS\tRSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55313\trs182462964\tA\tT\t100.0\tPASS\tERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55326\trs3107975\tT\tC\t100.0\tPASS\tAA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55330\trs185215913\tG\tA\t100.0\tPASS\tERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55367\trs190850374\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55388\trs182711216\tC\tT\t100.0\tPASS\tTHETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55394\trs2949420\tT\tA\t100.0\tPASS\tAC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55416\trs193242050\tG\tA\t100.0\tPASS\tAA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55427\trs183189405\tT\tC\t100.0\tPASS\tTHETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55816\trs187434873\tG\tA\t100.0\tPASS\tAN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55850\trs191890754\tC\tG\t100.0\tPASS\tAVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+1\t55852\trs184233019\tG\tC\t100.0\tPASS\tTHETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n'
b
diff -r 000000000000 -r 97c11d04cd4c test-data/replace_chromosome_names_output.vcf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/replace_chromosome_names_output.vcf Thu May 18 14:17:48 2017 -0400
[
b'@@ -0,0 +1,70 @@\n+##fileformat=VCFv4.1\n+##INFO=<ID=LDAF,Number=1,Type=Float,Description="MLE Allele Frequency Accounting for LD">\n+##INFO=<ID=AVGPOST,Number=1,Type=Float,Description="Average posterior probability from MaCH/Thunder">\n+##INFO=<ID=RSQ,Number=1,Type=Float,Description="Genotype imputation quality from MaCH/Thunder">\n+##INFO=<ID=ERATE,Number=1,Type=Float,Description="Per-marker Mutation rate from MaCH/Thunder">\n+##INFO=<ID=THETA,Number=1,Type=Float,Description="Per-marker Transition rate from MaCH/Thunder">\n+##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">\n+##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">\n+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">\n+##INFO=<ID=HOMLEN,Number=.,Type=Integer,Description="Length of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=HOMSEQ,Number=.,Type=String,Description="Sequence of base pair identical micro-homology at event breakpoints">\n+##INFO=<ID=SVLEN,Number=1,Type=Integer,Description="Difference in length between REF and ALT alleles">\n+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">\n+##INFO=<ID=AC,Number=.,Type=Integer,Description="Alternate Allele Count">\n+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total Allele Count">\n+##ALT=<ID=DEL,Description="Deletion">\n+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">\n+##FORMAT=<ID=DS,Number=1,Type=Float,Description="Genotype dosage from MaCH/Thunder">\n+##FORMAT=<ID=GL,Number=.,Type=Float,Description="Genotype Likelihoods">\n+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele, ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/pilot_data/technical/reference/ancestral_alignments/README">\n+##INFO=<ID=AF,Number=1,Type=Float,Description="Global Allele Frequency based on AC/AN">\n+##INFO=<ID=AMR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AMR based on AC/AN">\n+##INFO=<ID=ASN_AF,Number=1,Type=Float,Description="Allele Frequency for samples from ASN based on AC/AN">\n+##INFO=<ID=AFR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from AFR based on AC/AN">\n+##INFO=<ID=EUR_AF,Number=1,Type=Float,Description="Allele Frequency for samples from EUR based on AC/AN">\n+##INFO=<ID=VT,Number=1,Type=String,Description="indicates what type of variant the line represents">\n+##INFO=<ID=SNPSOURCE,Number=.,Type=String,Description="indicates if a snp was called when analysing the low coverage or exome alignment data">\n+##reference=GRCh37\n+##SnpEffVersion="SnpEff 3.0f (build 2012-08-23), by Pablo Cingolani"\n+##SnpEffCmd="SnpEff  GRCh37.66 -i vcf -o vcf -c /Users/arq5x/src/other/snpEff_3_0/snpEff.config ALL.wgs.integrated_phase1_v3.20101123.snps_indels_sv.sites.vcf.gz "\n+##INFO=<ID=EFF,Number=.,Type=String,Description="Predicted effects for this variant.Format: \'Effect ( Effect_Impact | Functional_Class | Codon_Change | Amino_Acid_change| Amino_Acid_length | Gene_Name | Gene_BioType | Coding | Transcript | Exon [ | ERRORS | WARNINGS ] )\' ">\n+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n+chr1\t10583\trs58108140\tG\tA\t100.0\tPASS\tAVGPOST=0.7707;RSQ=0.4319;LDAF=0.2327;ERATE=0.0161;AN=2184;VT=SNP;AA=.;THETA=0.0046;AC=314;SNPSOURCE=LOWCOV;AF=0.14;ASN_AF=0.13;AMR_AF=0.17;AFR_AF=0.04;EUR_AF=0.21;EFF=DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000423562|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000438504|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000488147|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000538476|),DOWNSTREAM(MODIFIER|||||WASH7P|unprocessed_pseudogene|NON_CODING|ENST00000541675|),UPSTREAM(MODIFIER|||||DDX11L1|processed_transcript|NON_CODING|ENST00000456328|),UPSTREAM(MODIFIER|||||DDX11L1|transcribed_unprocessed_pseudogene|NON_CODING|ENST00000450305|),UPSTREAM(MODIFIE'..b'MR_AF=0.0028;AFR_AF=0.02;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54421\trs146477069\tA\tG\t100.0\tPASS\tERATE=0.0013;AN=2184;AC=220;VT=SNP;RSQ=0.7869;AVGPOST=0.9461;AA=A;THETA=0.0025;SNPSOURCE=LOWCOV;LDAF=0.1190;AF=0.10;ASN_AF=0.25;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54490\trs141149254\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0074;AA=G;AN=2184;VT=SNP;RSQ=0.8366;AVGPOST=0.9646;AC=175;SNPSOURCE=LOWCOV;LDAF=0.0929;AF=0.08;ASN_AF=0.0035;AMR_AF=0.12;AFR_AF=0.03;EUR_AF=0.15;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54676\trs2462492\tC\tT\t100.0\tPASS\tLDAF=0.1528;RSQ=0.6989;AA=T;AN=2184;AC=267;VT=SNP;AVGPOST=0.8998;SNPSOURCE=LOWCOV;THETA=0.0110;ERATE=0.0037;AF=0.12;ASN_AF=0.02;AMR_AF=0.20;AFR_AF=0.09;EUR_AF=0.18;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t54753\trs143174675\tT\tG\t100.0\tPASS\tAA=T;AN=2184;RSQ=0.6820;AC=65;VT=SNP;THETA=0.0080;ERATE=0.0016;SNPSOURCE=LOWCOV;AVGPOST=0.9697;LDAF=0.0399;AF=0.03;AMR_AF=0.04;AFR_AF=0.07;EUR_AF=0.03;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55164\trs3091274\tC\tA\t100.0\tPASS\tAN=2184;VT=SNP;ERATE=0.0045;AA=A;THETA=0.0162;SNPSOURCE=LOWCOV;AC=1955;RSQ=0.6373;AVGPOST=0.8686;LDAF=0.8489;AF=0.90;ASN_AF=0.99;AMR_AF=0.94;AFR_AF=0.65;EUR_AF=0.96;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55249\t.\tC\tCTATGG\t443.0\tPASS\tAA=C;AVGPOST=0.9073;ERATE=0.0063;RSQ=0.5891;AN=2184;THETA=0.0038;VT=INDEL;AC=151;LDAF=0.0968;AF=0.07;ASN_AF=0.16;AMR_AF=0.08;AFR_AF=0.03;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55299\trs10399749\tC\tT\t100.0\tPASS\tRSQ=0.7602;LDAF=0.2954;AN=2184;VT=SNP;ERATE=0.0051;AA=c;AC=554;SNPSOURCE=LOWCOV;AVGPOST=0.8845;THETA=0.0070;AF=0.25;ASN_AF=0.33;AMR_AF=0.21;AFR_AF=0.39;EUR_AF=0.13;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55313\trs182462964\tA\tT\t100.0\tPASS\tERATE=0.0004;RSQ=0.6112;AVGPOST=0.9994;AN=2184;VT=SNP;THETA=0.0057;AA=A;SNPSOURCE=LOWCOV;AC=1;LDAF=0.0008;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55326\trs3107975\tT\tC\t100.0\tPASS\tAA=C;ERATE=0.0074;AN=2184;THETA=0.0085;VT=SNP;SNPSOURCE=LOWCOV;AVGPOST=0.9622;AC=90;RSQ=0.6901;LDAF=0.0562;AF=0.04;ASN_AF=0.07;AMR_AF=0.02;AFR_AF=0.07;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55330\trs185215913\tG\tA\t100.0\tPASS\tERATE=0.0005;AA=G;AN=2184;VT=SNP;THETA=0.0086;AVGPOST=0.9988;LDAF=0.0011;SNPSOURCE=LOWCOV;AC=1;RSQ=0.4701;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55367\trs190850374\tG\tA\t100.0\tPASS\tERATE=0.0004;THETA=0.0044;AA=G;AN=2184;VT=SNP;LDAF=0.0029;RSQ=0.3860;SNPSOURCE=LOWCOV;AVGPOST=0.9961;AC=2;AF=0.0009;AMR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55388\trs182711216\tC\tT\t100.0\tPASS\tTHETA=0.0102;ERATE=0.0005;AA=C;AVGPOST=0.9983;AN=2184;LDAF=0.0010;VT=SNP;RSQ=0.2348;SNPSOURCE=LOWCOV;AC=1;AF=0.0005;ASN_AF=0.0017;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55394\trs2949420\tT\tA\t100.0\tPASS\tAC=18;AN=2184;VT=SNP;AA=A;RSQ=0.4995;AVGPOST=0.9784;LDAF=0.0171;SNPSOURCE=LOWCOV;ERATE=0.0012;THETA=0.0063;AF=0.01;AMR_AF=0.01;AFR_AF=0.0041;EUR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55416\trs193242050\tG\tA\t100.0\tPASS\tAA=G;AN=2184;AVGPOST=0.9944;VT=SNP;LDAF=0.0064;AC=9;THETA=0.0019;RSQ=0.6553;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0041;AFR_AF=0.02;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55427\trs183189405\tT\tC\t100.0\tPASS\tTHETA=0.0054;AA=T;AN=2184;VT=SNP;AVGPOST=0.9969;LDAF=0.0020;SNPSOURCE=LOWCOV;AC=1;RSQ=0.2759;ERATE=0.0007;AF=0.0005;AFR_AF=0.0020;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55816\trs187434873\tG\tA\t100.0\tPASS\tAN=2184;THETA=0.0119;VT=SNP;AC=10;RSQ=0.4578;AA=A;SNPSOURCE=LOWCOV;AVGPOST=0.9844;LDAF=0.0108;ERATE=0.0007;AF=0.0046;AMR_AF=0.01;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55850\trs191890754\tC\tG\t100.0\tPASS\tAVGPOST=0.9921;AA=G;AN=2184;VT=SNP;RSQ=0.4083;THETA=0.0045;LDAF=0.0056;AC=5;SNPSOURCE=LOWCOV;ERATE=0.0006;AF=0.0023;EUR_AF=0.01;EFF=INTERGENIC(MODIFIER|||||||||)\n+chr1\t55852\trs184233019\tG\tC\t100.0\tPASS\tTHETA=0.0137;AA=G;AN=2184;RSQ=0.5433;ERATE=0.0009;LDAF=0.0046;VT=SNP;AVGPOST=0.9953;AC=5;SNPSOURCE=LOWCOV;AF=0.0023;AMR_AF=0.01;EUR_AF=0.0013;EFF=INTERGENIC(MODIFIER|||||||||)\n'