Mercurial > repos > iuc > vcf2maf
changeset 0:2973994fecd6 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vcf2maf commit 30046d5e0df4d80ac687edd03cf44b2afaa04550
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_indexes.loc Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,30 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa +hg19test hg19 Human (Homo sapiens): hg19 Test ${__HERE__}/test1.fa \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_test1.vcf Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,11 @@ +##fileformat=VCFv4.1 +##contig=<ID=21,assembly=GCF_000001405.26,length=46709983> +##contig=<ID=22,assembly=GCF_000001405.26,length=50818468> +##ALT=<ID=CNV,Description="Copy Number Polymorphism"> +##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles"> +##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant"> +##VEP="v105" time="2022-03-30 19:24:59" ensembl=105.525fbcb ensembl-io=105.2a0a40c ensembl-funcgen=105.660df8f ensembl-variation=105.ac8178e +##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|SOURCE|custom_annotation.gtf.gz"> +##INFO=<ID=custom_annotation.gtf.gz,Number=.,Type=String,Description="custom_annotation.gtf.gz (overlap)"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00096 +21 5733 rs142513484 C T . . CSQ=T|3_prime_UTR_variant|MODIFIER|MRPL39|ENSG00000154719|Transcript|ENST00000307301|protein_coding|3/3||||159|||||||-1||||custom_annotation.gtf.gz|,T|missense_variant|MODERATE|MRPL39|ENSG00000154719|Transcript|ENST00000352957|protein_coding|2/2||||70|70|24|A/T|Gca/Aca|||-1||||custom_annotation.gtf.gz|,T|upstream_gene_variant|MODIFIER|AP000223.42|ENSG00000260583|Transcript|ENST00000567517|antisense|||||||||||2407|-1||||custom_annotation.gtf.gz| GT 0|0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_test2.vcf Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,3 @@ +##fileformat=VCFv4.0 +#CHROM POS ID REF ALT QUAL FILTER INFO +chrM 5701 . GA G . . .
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_test1.tabular Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,3 @@ +#version 2.4 +Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED GENE_PHENO FILTER flanking_bps vcf_id vcf_qual gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF vcf_pos +MRPL39 0 . GRCh37 21 5733 5733 + Missense_Mutation SNP C C T novel TUMOR NORMAL C C ENST00000352957 2/2 MRPL39,missense_variant,,ENST00000352957,;MRPL39,3_prime_UTR_variant,,ENST00000307301,;AP000223.42,upstream_gene_variant,,ENST00000567517,; T ENSG00000154719 ENST00000352957 Transcript missense_variant 70 70 24 A/T Gca/Aca -1 MRPL39 protein_coding 2/2 MODERATE . GCT rs142513484 . 5733
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_test2.tabular Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,3 @@ +#version 2.4 +Hugo_Symbol Entrez_Gene_Id Center NCBI_Build Chromosome Start_Position End_Position Strand Variant_Classification Variant_Type Reference_Allele Tumor_Seq_Allele1 Tumor_Seq_Allele2 dbSNP_RS dbSNP_Val_Status Tumor_Sample_Barcode Matched_Norm_Sample_Barcode Match_Norm_Seq_Allele1 Match_Norm_Seq_Allele2 Tumor_Validation_Allele1 Tumor_Validation_Allele2 Match_Norm_Validation_Allele1 Match_Norm_Validation_Allele2 Verification_Status Validation_Status Mutation_Status Sequencing_Phase Sequence_Source Validation_Method Score BAM_File Sequencer Tumor_Sample_UUID Matched_Norm_Sample_UUID HGVSc HGVSp HGVSp_Short Transcript_ID Exon_Number t_depth t_ref_count t_alt_count n_depth n_ref_count n_alt_count all_effects Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation ALLELE_NUM DISTANCE STRAND_VEP SYMBOL SYMBOL_SOURCE HGNC_ID BIOTYPE CANONICAL CCDS ENSP SWISSPROT TREMBL UNIPARC RefSeq SIFT PolyPhen EXON INTRON DOMAINS AF AFR_AF AMR_AF ASN_AF EAS_AF EUR_AF SAS_AF AA_AF EA_AF CLIN_SIG SOMATIC PUBMED MOTIF_NAME MOTIF_POS HIGH_INF_POS MOTIF_SCORE_CHANGE IMPACT PICK VARIANT_CLASS TSL HGVS_OFFSET PHENO MINIMISED GENE_PHENO FILTER flanking_bps vcf_id vcf_qual gnomAD_AF gnomAD_AFR_AF gnomAD_AMR_AF gnomAD_ASJ_AF gnomAD_EAS_AF gnomAD_FIN_AF gnomAD_NFE_AF gnomAD_OTH_AF gnomAD_SAS_AF vcf_pos +mt:ND3 0 . BDGP6.32 chrM 5702 5702 + Frame_Shift_Del DEL A A - novel TUMOR NORMAL A A c.95del p.Asp32AlafsTer28 p.D32Afs*28 FBtr0100870 1/1 mt:ND3,frameshift_variant,p.Asp32AlafsTer28,FBtr0100870,;mt:ND2,downstream_gene_variant,,FBtr0100857,;mt:CoI,downstream_gene_variant,,FBtr0100861,;mt:CoII,downstream_gene_variant,,FBtr0100863,;mt:ATPase8,downstream_gene_variant,,FBtr0100866,;mt:CoIII,downstream_gene_variant,,FBtr0100868,;mt:ND4L,downstream_gene_variant,,FBtr0100880,;mt:ND6,upstream_gene_variant,,FBtr0100883,;mt:ATPase6,downstream_gene_variant,,FBtr0433498,;mt:ND4,downstream_gene_variant,,FBtr0433500,;mt:ND5,downstream_gene_variant,,FBtr0433501,;mt:Cyt-b,upstream_gene_variant,,FBtr0433502,;mt:tRNA:Trp-TCA,downstream_gene_variant,,FBtr0100858,;mt:tRNA:Cys-GCA,upstream_gene_variant,,FBtr0100859,;mt:tRNA:Tyr-GTA,upstream_gene_variant,,FBtr0100860,;mt:tRNA:Leu-TAA,downstream_gene_variant,,FBtr0100862,;mt:tRNA:Lys-CTT,downstream_gene_variant,,FBtr0100864,;mt:tRNA:Asp-GTC,downstream_gene_variant,,FBtr0100865,;mt:tRNA:Gly-TCC,downstream_gene_variant,,FBtr0100869,;mt:tRNA:Ala-TGC,upstream_gene_variant,,FBtr0100871,;mt:tRNA:Arg-TCG,upstream_gene_variant,,FBtr0100872,;mt:tRNA:Asn-GTT,upstream_gene_variant,,FBtr0100873,;mt:tRNA:Ser-GCT,upstream_gene_variant,,FBtr0100874,;mt:tRNA:Glu-TTC,upstream_gene_variant,,FBtr0100875,;mt:tRNA:Phe-GAA,downstream_gene_variant,,FBtr0100876,;mt:tRNA:His-GTG,downstream_gene_variant,,FBtr0100878,;mt:tRNA:Thr-TGT,upstream_gene_variant,,FBtr0100881,;mt:tRNA:Pro-TGG,downstream_gene_variant,,FBtr0100882,; - FBgn0013681 FBtr0100870 Transcript frameshift_variant 95/354 95/354 32/117 D/X gAc/gc 1 1 mt:ND3 FlyBaseName_gene protein_coding YES FBpp0100181 P18930,P18930 B6E0P8,J7FKZ6 UPI0000000AE1 1/1 Gene3D:1.20.58.1610,Pfam:PF00507,PANTHER:PTHR11058,PANTHER:PTHR11058:SF9 HIGH 1 deletion . CGAC . . 5701
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,7 @@ +KJ947872 chrM +NC_024511.2 chrM +KJ947872 mitochondrion_genome +KJ947872.2 chrM +KJ947872.2 mitochondrion_genome +NC_024511.2 mitochondrion_genome +chrM mitochondrion_genome
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,7 @@ +species drosophila_melanogaster +assembly BDGP6.32 +sift b +polyphen +source_sift sift +source_genebuild dmel_r6.32_FB2020_01 +source_assembly BDGP6.32
Binary file test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test1.fa Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,168 @@ +>21 dna:chromosome chromosome:GRCh38:21:1:10000:1 REF +CCACAATCATTTTAGGAGAATGGGTTTAAGAAAGGAAAAAAAAAAAAAAGATTTCTGTAT +GCTCTTAAGAGAAAATCTAAAAAATAATGACATGAAAAAGTTGAAAGGAATGGAAAAATA +TGTACCATTAAAAGGAAACCCGACGTATGAATGCCATTATCAGACAAAACAGATTTTTTT +CTTTTTGAGATGGAGTCTCACTCTGTGGCCCAGGCTGGAGTGCAGTGGCACAATCTCTGC +TCACAGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCCAAGTAGC +TGGGACTACAGGCACCCACCACCACACCAGGCTAGTATTTGTATTTTTAGTAGAGACGGG +GTTTCATCGTGTTAGCCAGGATGGTCTCAATCTTCTGACCTCGTGATCTGCCCACCTCAG +CCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGCGCCCAGCCAGACAAAACAGATT +TTAAGACAACTAAGAAGTTAACAAGCTGACCCTACAATAAGCATGAAAATTTTGAAAAAG +AATAGGAAAGGAGAACTCACCATAAGAGAAATTGAAACTTGTTATAAAGCTATAGTTGTT +AAAACGGTGTTACTACAGTGGTACATGGACAGATAAATGGACCAATGAAGCAGACCCAGG +CACTGAAAGGAACCTTTTATATGACAGCATGGCACAATCAGTAAGAATAGAGAGGAAATA +GGCCAGGCACGGTGGCTCACGCCTGTAATCCCAGTACTTTGGGAGGCCAAGGCAGGCAGA +TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGTCAACATGGTGAAACCGCGTCTCTACT +AAAAATACAAAAATTAGCTGGTCGTGGTGGCAGGCACCTGTAATCCCAGCTACTCGGGAG +GCTGAGGCAGGAGAATCACTTGAACCCGGCTAGCGGAGGCTGCAGTGAGCCGAGATCACG +CCATTGCACTCCAGCCTAGGCAACAAGAGTGAAACTCCATCACAAAAAAAAAAGAAAAAG +AGTAGACAGGAAATAAATGGTCCAGAATAACTGCCTATCCTTGTGGAGGAGAGGGTGATT +CAAAATTAGGTCCCTTTCCTCACTCTATATGCAAAAAACAAACTTCAAATAAATTATACA +ATTAAATGTGAAAATCAAGACTTTAAAATAAACAATGCAGTAGGCTGCTTTATAATATCA +AGTTAGGGAAGGCTTTCTTAAATTTCATAAACATAAATCATAGAGGAAAAGATGAACTGT +CTACCTTAAAATTAAAGACGATATAAACAAAATTAAAAGGTAAGCCAGACAAAAGAAATA +TTTGTAGTGACAACGGTTTAACTTTCTTTCTTTCTTTTTTTTTTTTTGAGACGGAATCTC +ACTCTGTCACCCAGGCTGGAGTACACTGGTGCAATCTCAGCTCACTGCAACCTCCACCTC +CCAGGTTCAAGCGATTCTTGTGCCTCAGCCTCCCAAGTAGCTGGGATGACAGGTACGCAC +CACCACACCCAGCTAATTTTTTGTATTTTTAGCAGAGACGGGGTTTCACCATGTTGGCCA +GGCTGGTCTCGAATTCCTGACCTCAGGTGATCCATCTGCCTCAGCCTCCCAAAGTGCTGG +GATTACAGGAGTGAGCCACTGCACCTGGCCCACAAGGGTTTCACTTTCTAAAAATATAAA +GAACTGGCCAGGTGCAATGGCTCACACTTGTAATCCTAGCACTTTGGGAGGGCAAGGAGA +GCAGATCGCTAGAGGCCAGAAGCTGGAGAACAGCCTGGCCAACATGGTGAAACCCCATCT +CTACTTTAAAAATATAAAAATTAGTTGGGTGTGGTGGCACTTGCCTATAATCCCAGCTAC +TTGGCTACTCAGGAGGCCGAGGCAGGGGAATCGCTTGAATCCAGTAGGCAGAGGCTACAG +TGAGCCAAGATCATGCCACTGCACTACAGCTTGGGCAACAGAGTGAGACTTGGTCTCAAA +AAAAAAAAAAATTATATATATATATCTTATATATATACACTATTATATATATACACACAC +ACACACACACACACACACACACACACACACACAATTAATATGAGATGCCCAAAAATCCAA +TTGTAAAAAGGGGCAAAGGTTGTAAACTGGTAATTCATAAAAACAAATGAAGAGATGCTT +ATTGGTACTATATGCTCAGTATTAAGCAAATTAAATGAGATAGGATCGTGCATATTCAAC +CAACAAAATATCTGAATGTCTGAAAATAATAAATGTTAATGAGGGAGTGGAGAAAATGGG +AATGCTCATACTGCTGATAGAGAGTAAACTGGTACAACTATTGTGGCAGCCAATTAATAT +TTAGTAAAGCTGAAGATGCATGGTCCACTGTGGTACAGGCCCTGGAGATATTATCAAATG +TGTACACAAAGAAACACGCACAAGGATATTTTCTGCGATACTGTAATACTCAAAAGCCAA +TGACATCCTCAGTGGTCATCAATAAGAAAATGAATTAATGATGGGATTAATCATATAATG +AAATACTATATAGCAGTTGAAATGAATGTACTCTTTACATGTATCAACATGCTATACATA +AAAAACAATGATGAGCAATAAAAGCAAATTGCAAAAGGATATATATTATGAAACCAATTA +TGTTTAGTTTTAAAACACAGAGAATACTATGGATTGTAGTAAAAAAAAATATAAAATCAT +GAAGAGTAAGGACAGGTACAAACAGGATAGTGGTTCTTCTTGAGAGGAAGGAAATGACAT +AACAAGACCTTCAATGGATGTGCAGCTTTTCCTTTATTTAAACACAAAAGGATCTGAAGG +AAATAAGGAAAAAGGTTGACAGTGGTTACAATTAAATAGTGGCTGTATGTCAACACTCTT +GGTTACAAACAACAGGATCTACACTAGCTAGTTTAAACAAACAGAATAAAATGGCTCACA +AGTCACAGGCGTGGAGGTCAGGACTACGGAGCCCCCAAAAATGCCCATTTTTATACCTTG +GAGCAGCTGCGGGGGAAAAACTGCTAAGCAAAGCCTCCACACCTTGCACCATTACATGGG +ACCTCTGCCACTGCTGCCTTGAAAACCACATCACTGCTCCGTTCAACAAAATGTATCTCA +TACTACTCTTGTCTGCAAAGTACTTGCTTCCAGATTTCACACAGTTATGTCTAATTGGTG +AGCCCATGCTACCTGTCTGAGCCACAGCTGCAAGGAAGGGCAGGAGATTAAATTTCATTC +TTCTACTGGGTAAGGCGAGATCCACAGAGTGGGAAGTTGCCAAAAAGCAGGTGTTCAAAC +AGTGCTAGCTGCCCAAAAAGCACGAAAAGTGCCCACTCAAACAAGAGTTGGTGAAAATAT +TCTCTCTACTTTTCTGTATGCTCAAATATTTCACAATTTTTTTAAAGAAAAAATGTCGAA +GTATGTAAATTCACAAACAACAAAGGGAATGGAAAAAAAATCAATAAACAAGAGACGTCA +ACCAAATTCTAAAAGACAAAAAGCTAAGTGACTAATAAAACACTAGAGAATGTCACCACC +TAGAAAACATGTGGAGGAAGTTCCATCAGAGGCAGCCAACCGGCCCAGCTGGGCCTTAGC +TCAGAGGCAGCAAAAGTTCATAGATGATAGAAGACAGTAGAGGGATTAGTTAAATCTAAA +CTAATTAAAAAGAAAGGAATTAATTAAAGGTCTGTATACAAACTGGTTGAACACTACCCC +ATCTTCAAATATAGAAGGGCCTAAATCCAGGAAGTGCCCCATGCAAAATATTGAAGGGAT +CTTTATTATAAAGTAAGTGAAAGAACAGTCTGGGTAAAACTAGGATAGTCAATTTAGAAA +TCGGTAGCCAAGACAAGACCTCTTCATTCTAGCATTTTAAAAACCCTCATCCTACCAGCC +AGATTTACCTACTTATCCTTAGTGATGCTTATGAGAAGCCAAACCTTCCTCAAACCACAG +AGCTGCCACTCAACCTCCCTACATCCACATCTTCAACTACAAACTGACAGCCAACAAACG +TAAGACATTTGAAAAAAGCCTGAAACATGTAGATGAAAGACTAAGAAAAAAATCTGTCAC +TGGAGAAAAGAGATGATTCAGGGACTAGAAAGTGAATGAAGAAATGAATAAAAATTATAT +TGATATCTTCAGAGAGCTTCATGAAAATACTTCACATCCAGAAAACATGAAAAGGATTCT +ATGAAACAGAAGCAAAAGAGAGCTTTCAGAAATTAAAATATGCTTGACATAATGTGAGTA +AAATATAAATGCAGGAAAAATAATAGAGAAAGGGAAAAAAATACAGAGGACCAATACAGA +CGATGCAAATACAGCTGAAAGGTATTAGAGAAACAAAGAACCTCCAGAGAAAAGAAAGAA +CAAATAAAGTAAAAAAACATAAAAGAGAGATGTCAAAAATTGAAAGGATCGCCTACGTAC +CAAGCAGAATAACTGACTGAAGACCTAGATACAGGTACAGGATTGTCGAATTTTACAACA +CCAAGAATAATAAGATCCTAAGTGTCCAGAGTGTGTTGAGAGAAGGGGATGCAGAGAACA +GGTTATATAAAAAAGGAACAAGAATCAGACTGGCATCAGACCTTTCATTAGCAAAATCAG +ATGCCAGAAAATAACTGAGCTGCTTTCAAAGTTCTAAGAGAAAACTACTTTTCAATCTAG +AATTCTGTATCTAGCTGAATAGTTCATCAAATGTAAGCGCATAACCCACTTTAGACATGC +AATGGCTCACCTCTTAGACATTTTTTCCTAGAAAGGAGCTTAGAGACAAATGCTAGCAAA +ATGAGAAAGTTAATCAAGAAACAGGAAGATATGGACTCTAAGAAAAAGTTGACCTCACCC +AGAAAAAAAAAAAAAAAAATACGGCCTTCAGAGGAAGCCTGTGCACAAGATGCAGAAAAC +TTGCTGACAAAGAAGCAGAAACTTTTTGGCAAAATATTTTTGTAAGACTGATAACCAAGG +CTTACTCAAGGTGTGGGGAAACAGGCACTCTTAGACAGTTATTGGGAGTAACACTTGGCA +AAACCTTTGGGAAGCCAGCATCTGGTAGCACCTGACAGTAATTCCACTGCCAGAAATCTA +TTTCTACAGAATTATTTATACGTGAGGCAACATAGAGTAGAACATAAGAGCATGTGAGTG +AAACTACCTCCGTTCAAATTCCCACTTTACCACTTTCAAGCTCTATGCTGTTTCCTCATT +TAAAAAATAGAAACATCTTCTCAACAGGTCACTATGAGGATTATAGAAAAGGTTCCACAC +GTGATGGGCTTCGCACAAGACCCAGCACATGATAAATACTCAGTTAATATTACTTATCTA +CAAACATTTGCCATTTTTGCACAAGGATGTTTACTGTGCCACTGTTCGGCAAAAAACGCA +AGCAAGTATAGTATGGTCCTATTTATGTTTTTTAGAAATTACATTTATATTTGTATGTAT +CTGCCTAGATCTGTAGGAAAAAATAGAGAAAACATGCTAAAGTGTTGCATCCATGGAGCA +GGAAGGGAGTGACCTGTTTTTGTACAGTTTTTACTTAAGCAACATGAAGTGTTTCATACT +TTAGAGAAAAGAAAAAGTAAAGATCACTATCAATCTTGGGGAGAAAGGGTTAATCTGTAT +AAGCACACACAAACATTATATTTAAAACATTTATTTTATTATACATATTTAAATTTTAGA +AAGTTATTAGGTAGATGTACATTCCTCTGTTGCTTTACTTTGATCTTCAGTTACCTGTAA +AAACATGAATAGCTTTACTTTCCTAATAAACACTTTCAGTTTTACCCTTCACCCACCATT +TTAAATCATTCCACAGCCCTCTTATAATGACCCTTTATCTTTCCCCTTAAATAGTAGAAT +AGAAAGTTAGGTAAGAAGAAAGATTTAAAGGCCAAGTAACCAATATAAATTATCAGGTAA +AGAACAATAATTCCATTCAAAAATATAATCTAAGGCATATTATACCTAGGCTTCTCAAGT +TGGCCATTTTACTTTTCAGCCGTAAAACAAAATCATTAGTGTAAGAAAAAACACCTGAAG +TGCTGTAACAAGCTATTTTAAATCAGTTTATTTTCTTTTGGAGTATAATTCCATTTAGAA +TAGAACATATTACAGATTTAGTCTAAATAGATATCTCTATGTCCCTATTTCTATTCTGTG +TCACTCAGACTCATCAAACAGACATGGCTAAATCTGAATTCAACACCCCAAAACCTGTCC +TACCTGCATCATCTATCTTGGTAAAGGGCTCCACCCAGATACCTAAGGACAGCATTCAAG +AATTCACTCTCTTCTTCCTACCTGCATATTCCATCAACAGTCCTACTAATTCAGCCTAAG +TTAACTCTCACAGTCATTCTCCCTATCGCGGCTGAACTTTAGGACCTCATTCTCTTCTAT +CCCCCTAACAGAACTCTTGGCTTAAGTCACAGCTATATCCCATGCTACCCCTAGAATGAG +CTTTTAAACAAGAATAGCTGATCAACAGGTCACTTCCCTACTCAAAATTCTCCAGTTACT +CTCTAGCATCTCAGTGGGGTGGAGGAGGAAAGTCAGGAATGCAAGTTCCACAACACGGGA +CACAAGCCCCTCATATTCTGTTTTTGGACTGACTCTTTACCTATCCTACATGTGACCTGT +ACATTCAAATTCATCTTACATTCATCATGTTCTCTATGTAGGCAAGTGACTTCCTCTGAG +ATAACCTCCAACCCATTTTCCAACAGATTATCTGCATGGTCCACTTTTATTCACCCTTTA +AAATCAGCTTAAGAGACTCCAATTCCTGGAAGCCTTCCTTAAGCAACTCCCCAGCTTGGT +CTGAGTCCTCTTCTTTATGCCAGAGCCCCACATGTACACCTGAACTGTCTGCTTATAAGT +CTAATTATCCAATCAACATTACTTCCCTGAGGCAGCAAGTAAATTTTGTGCATTTTTTTG +TACCTCCAGACCATAACAGTACCTGGCACACAATAAACAATCAGTAAGTCTGTGTTGCAT +CAATGAACCACGTTTAATCTAATCCCTTCAGAAAAGCAAAATGGACAGTAGAAAAAAGCA +ACTAAACCAGAATAAATGTTTGAAAAAATATGTATTGACTAACATCACTAGATATTCTAG +GTAAGACACTGTCTCTGCCGTTTTTCTACATTTTTCATTCCTTTAAAGTGTTCAGTTACT +ACTGCTTCTTTAATTTTCCAAACTATTTCTTTAATATTCTTTAGCTTTTTAAATTTTTTT +TAATTTTACTTAACTCCAAGTAAACATGCTGAACATTTTTTCTTTAGTTTCTTCATCTAA +AATGATCACTGTGACCAACTAAAAAGAGAAAATTACTCCATCCATTCAAAGGTAATTTCT +TCTCATGAGAAAAAAAATTATATAACCCTTACTCAAGAACATTATGGTGAAGTCCCTAAA +CAATAATTCCTATCTACATAGTCATTAATAAAGACTAGGTTTATATATCATTATGAAAAA +TTGTCAATAAATATTTCTTGGGTAACCCCTTCATGTGCTAATGTTGTGTTCAAAATTTGG +GCTTACACAAATATAACAAATCCTGAAAGGCTTAACAGACAGAAAATAAATGTACTGGCA +AAGAAAACAATCAAAAAATAGTATGTGGACATATACAATCAAGCACAAACTTATATGAAT +AATTATTTGTTAGAGATTACTCTGAGACTGTTCCATGGAGGAGGTACGACTCAGGCGAGG +TAGTGAAGAATGACTGCGTAGTAAATAGAAGGAGAATGGGAAATGGAGTCTAGAAAGAAA +AACTATCATGAAGAAATAAGCAAACTTGAAAATAAGTTTAATGTGTGATGCTTAATGTGC +CTGCAAGCTTAACCAGTGTTTGTGGCATTGTGGGAAATTAGGATAAGGAGGAAGTCCTAA +TGAGGACAGATTACACAGGACAGCTATCAGATAACCTAAGTAAAAGAGGCACACTTAGCC +TATGAAATAGGAAACAGGAATAGTTGTGGTCAAGAAATACAATCACCCGGGCCGGGCGCG +GTGGCTCACGCCTGTAATCCCAGCACCTTGGGAGGCTGAGGGGGGTGGATCACGAGGTCA +GGAGATCGAGACCATCCTGGCTAACACGGTAAAACCCTGTCTCTACTAAAAATACAAAAA +AATTAGTTGGACGTAGTGGTGGGCACCCATAGTCCCAGCTACTTGGGAGGCTGAGGCACG +AGAATCGCTTGAGCCCGGGAGGTGGAGGTTGCAGTGAGCTGAGATCGCGCCACTGCACTC +CAGCCTGGGAGACAGAACAAGACTCTGTCTCAAAAAAAAAAAGAAAAGAAATACTATCAT +TCCTACTCATTAAGTCTAAATGCTAATGACAAAGAATAAACTACAAAAACACAAACATTC +AAATCTGGTTTTTCAGGACATTAGGATATTTGCACATTTACTGTTAATGGCTAAAGGCGG +CTTTAAAATACACCTGACAAATCTTAATCCTCTAAATATTCATTAAGTTTACAAAATAAA +TCCTAAGACCTAACTGATACCATTACAACTATTAAGAACTATTACTATTACATAGTAATA +TTCTTATTTCTCTTCAAGTTCTACAATATCATTTTCACGATATTAACAAATAAAATCAAA +AGAGAACTGCCAAGTTACAACTTTACCCTTACCAACACACTAGTGAACAATCACAACACT +GCTTTGACTATTGAGACAACTTACTTTTTTCCTTTCTCTTTGTTTCTTTCATTTTTCCCC +CCTTTGGTTGTTATTGGTTAATTTTGATGAATTTCTTTATAGAAGAGTACTCAATAGCTG +TCAAAAACACTTACCATTTTCCGAGATCTTTCCAATAGCTTATCCCATATTGTAAAATGT +GCCTTGAAAAGAAAAGATTTGCGATGAACTAAATGAAGCAGTAATGTCAAAGTAAAAAGG +ACAGAGTCTTTATATAGCTTTTAGAACAAAATAAATAAAAAGCTAGGCAGTACTAATTTA +GATAAGTGGTTATAAAAGTGGATCATAGTAGCAAAAAAAGGTGCTACATGGGGATAGCTG +CTTTTTTGCTTTCTTCAATCGCATGAACCCGGGAGGCAGAGGTTGCAGTGAGCATTTAAG +AAAAAAACGCTTAAGGATTGAACGTGCCCTTCTTTCACTTTTGCCCATCTTAATCATTTC +CATCATAGAAAGCATTTCTTCTAATAGGAAGCAAAAATCTGCCCTACTTTCCCATAGACT +GTGGTTCAATCCTTAAACAGCCAGTTTCAACATTCTAGAAGTCATTCTTCAAATCATAAG +CAATCACAAGAAGACAGTCATTAAAAACCAACTGTGACATCTTCACACTGAAATGGCAGA +AGCAGAGGATTCATCAAGTTACCCTAAAGAATAAGTAAAACCTAACAAAGACAATAGTAA +ATTTTTTTTTTTTTTGAAATGGAGTTTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGTGC +GATCTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTC +CCAAGACAATAGTAAAATTTAAACTCAATTTCCTTAGTCCATAACAACCTCCTGTAGCAG +AGGATTATCAGAATGCATTAAAGAAAACTGTGCAAAGTGTATCATGACCACATCAATTTT +ATTAGGTCAACTGAAAGTGGTAACAAGACATATGGGCAGGCCAGTGACTACTCCACACTG +AATGAGCTCATAAAATCTATAATAAAAGGTAAAATTAATAAATATCAACATACAAACCCT +TCCAGGGAAAGAGCTGACTGGTATGTTTAAAGGGAAAACCATGCCTGACTCAGGCGGAAT +GAACTGCTGGTGCAGAGACCTTAAGCTGTGGCTGGAATATAGTGAGTGAGGAAAGGAGTG +GTGTTAGATAAAGTCAGAGAAGCAGGCAGGGACCAGATAATGCAGGGCTTTGTGAGACAG +GGTAAAGAGTTGGGATTTGTTCAAGGGAAGCCATTGGAAA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test2.fa Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,2 @@ +>chrM +AATGAATTGCCTGATAAAAAGGATTACCTTGATAGGGTAAATCATGCAGTTTTCTGCATTCATTGACTGATTTATATATTATTTATAAAGATGATTTTATATTTAATAGAATTAAACTATTTCTAAAAGTATCAAAAACTTTTGTGCATCATACACCAAAATATATTTACAAAAAGATAAGCTAATTAAGCTACTGGGTTCATACCCCATTTATAAAGGTTATAATCCTTTTCTTTTTAATTTTTAATAATTCGTCAAAAATTTTATTTATTACAATTATAATTATTGGGACATTAATTACAGTTACATCTAATTCTTGGTTAGGAGCTTGAATAGGTTTAGAAATTAATTTATTATCTTTTATCCCCCTATTAAGAGATAATAATAATTTAATATCTACAGAAGCTTCTTTAAAATATTTTTTAACCCAAGTTTTAGCTTCAACTGTTTTATTATTTTCTTCAATTTTATTAATATTAAAAAATAATATAAATAATGAAATTAATGAATCTTTTACATCCATAATTATTATATCAGCTTTATTATTAAAAAGTGGAGCCGCTCCTTTCCATTTTTGATTTCCTAATATAATAGAAGGTTTAACATGAATAAATGCTTTAATATTAATAACTTGACAAAAAATTGCACCTTTAATATTAATTTCTTATCTTAATATTAAATATTTATTATTAATTAGAGTAATTTTATCAGTTATTATTGGAGCTATTGGAGGATTAAATCAAACTTCTTTACGAAAATTAATAGCATTTTCTTCAATTAATCATTTAGGGTGAATATTAAGATCTTTAATAATTAGAGAATCAATTTGATTAATTTATTTTTTTTTTTATTCATTTTTATCATTTGTATTAACATTTATATTTAATATTTTTAAATTATTTCATTTAAATCAATTATTTTCTTGATTTGTTAATAGAAAAATTTTGAAATTTACATTATTTATAAATTTTTTATCATTAGGAGGATTACCTCCATTTTTAGGATTTTTACCAAAATGACTTGTAATTCAACAATTAACATTATGTAATCAATATTTTATATTAACACTTATAATAATATCAACTTTAATTACATTATTTTTTTATTTACGAATTTGTTATTCCGCTTTTATAATAAATTATTTTGAAAATAACTGAATCATAAAGATAAATATAAATAGTATTAATTATAATATATATATAATTATAACttttttttcaatttttggattatttttaatttctttattttattttatattTTAAGGCTTTAAGTTAATAAAACTAATAACCTTCAAAGCTATAAATAAAGAAATTTCTTTAAGCCTTAGTAAAACTTACTCCTTCAAAATTGCAGTTTGATATCATTATTGACTATAAGACCTAATTAATTTGTCCTTATTTGATTAAGAAGAATAAATCTTATATATAGATTTACAATCTATCGCCTAAACTTCAGCCACTTAATCAATAATCGCGACAATGATTATTTTCTACAAATCATAAAGATATCGGAACTTTATATTTTATTTTTGGAGCTTGAGCTGGAATAGTTGGAACATCTTTAAGAATTTTAATTCGAGCTGAATTAGGACATCCTGGAGCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCACATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGTGGATTTGGAAATTGATTAGTGCCTTTAATATTAGGTGCTCCTGATATAGCATTCCCACGAATAAATAATATAAGATTTTGACTTCTACCTCCTGCTCTTTCTTTACTATTAGTAAGTAGAATAGTTGAAAATGGAGCTGGGACAGGATGAACTGTTTATCCACCTCTATCCGCTGGAATTGCTCATGGTGGAGCTTCAGTTGATTTAGCTATTTTTTCTCTACATTTAGCAGGAATTTCTTCAATTTTAGGAGCTGTAAATTTTATTACAACTGTAATTAATATACGATCAACAGGAATTTCATTAGATCGTATACCTttatttgtttgatcagtagttattactgctttattattattattatCACTTCCAGTACTAGCAGGAGCTATTACTATATTATTAACAGATCGAAATTTAAATACATCATTTTTTGACCCAGCGGGAGGAGGAGATCCTATTTTATACCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGAATAATTTCTCATATTATTAGACAAGAATCAGGAAAAAAGGAAACTTTTGGTTCTCTAGGAATAATTTATGCTATATTAGCTATTGGATTATTAGGATTTATTGTATGAGCTCATCATATATTTACCGTTGGAATAGATGTAGATACTCGAGCTTATTTTACCTCAGCTACTATAATTATTGCAGTTCCTACTGGAATTAAAATTTTTAGTTGATTAGCTACTTTACATGGAACTCAACTTTCTTATTCTCCAGCTATTTTATGAGCTTTAGGATTTGTTTTTTTATTTACAGTAGGAGGATTAACAGGAGTTGTTTTAGCTAATTCATCAGTAGATATTATTTTACATGATACTTATTATGTAGTAGCTCATTTTCATTATGTTTTATCTATAGGAGCTGTATTTGCTATTATAGCAGGTTTTATTCACTGATACCCCTTATTTACTGGATTAACGTTAAATAATAAATGATTAAAAAGTCATTTCATTATTATATTTATTGGAGTTAATTTAACATTTTTTCCTCAACATTTTTTAGGATTGGCTGGAATACCTCGACGTTATTCAGATTACCCAGATGCTTACACAACATGAAATATTGTATCAACTATTGGATCAACTATTTCATTATTAGGAATTTTATTCTTTTTTTTTATTATTTGAGAAAGTTTAGTATCACAACGACAAGTAATTTACCCAATTCAACTAAATTCATCAATTGAATGATACCAAAATACTCCACCAGCTGAACATAGATATTCTGAATTACCACTTTTAACAAATTAATTTCTAATATGGCAGATTAGTGCAATAGATTTAAGCTCTATATATAAAGTATTTTACTTTTATTAGAAAATAAATGTCTACATGAGCTAATTTAGGTTTACAAGATAGAGCTTCTCCTTTAATAGAACAATTAATTTTTTTTCATGATCATGCATTATTAATTTTAGTAATAATTACAGTATTGGTGGGATATTTAATATTTATATTATTTTTTAATAATTATGTAAATCGATTTCTTTTACATGGACAACTTATTGAAATAATTTGAACTATTTTACCAGCAATTATTTTACTATTTATTGCTCTTCCTTCTTTACGTTTACTTTATTTATTAGATGAAATTAATGAACCATCTGTAACTTTAAAAAGAATCGGCCATCAATGATATTGAAGTTACGAATATTCAGATTTTAATAATATTGAATTTGATTCATATATAATTCCAACAAATGAATTAATAACTGATGGATTTCGATTATTAGATGTTGATAACCGAGTAGTTTTACCCATAAACTCACAAATTCGAATTTTAGTAACAGCTGCTGATGTTATTCATTCTTGAACAGTACCTGCTTTAGGAGTAAAAGTTGACGGTACACCTGGACGATTAAATCAAACTAATTTTTTTATTAATCGACCGGGTTTATTTTATGGTCAATGTTCAGAAATCTGTGGAGCTAATCATAGATTTATACCGATTGTAATTGAAAGTGTTCCTGTAAATTACTTTATTAAATGAATTTCTAGAAATAACTCTTCATTAGATGACTGAAAGCAAGTACTGGTCTCTTAAACCATTTAATAGTAAATTAGCACTTACTTCTAATGATAAAAAATTAGTTAAAATCATAACATTAGTATGTCAAACTAAAATTATTAAATAATTAATATTTTTTAATTCCACAAATAGCACCtattagatgattattattatttattattttttctattacatttattttattttgttctattaactattattcttatATACCAAATTCACCTAAATCTAATGAATTAAAAAATATCAACTTAAATTCAATAAATTGAAAATGATAACAAATTTATTTTCTGTATTCGACCCCTCAGCTATTTTTAATTTTTCACTTAATTGATTAAGAACATTTTTAGGACTTTTAATAATTCCGTCAATTTATTGATTAATACCTTCTCGTTACAATATTATATGAAATTCAATTTTATTAACTCTTCATAAAGAATTTAAAACTTTATTAGGCCCATCAGGTCATAATGGATCTACTTTTATTTTTATTTCTTTATTTTCATTAATTTTATTTAATAATTTCATAGGATTATTTCCATATATTTTTACAAGAACAAGACATTTAACTTTAACTTTATCTTTAGCTTTACCTTTATGATTATGTTTTATATTATATGGATGAATTAATCATACACAACATATATTTGCTCATTTAGTTCCTCAAGGAACACCCGCTATTCTTATACCTTTTATAGTATGTATTGAAACTATTAGAAATATTATTCGACCTGGAACATTAGCTGTTCGATTAACTGCTAATATAATTGCTGGACATTTATTATTAACTCTTTTAGGAAATACAGGACCTTCTATATCTTATATTTTAGTAACATTTTTATTAATAGCTCAAATTGCTTTATTAGTATTAGAATCAGCTGTAGCTATAATTCAATCTTATGTGTTTGCTGTATTAAGAACTTTATATTCTAGAGAAGTAAATTAATGTCTACACACTCAAATCACCCTTTTCATTTAGTGGATTATAGTCCATGACCATTAACAGGAGCTATCGGAGCTATAACAACTGTATCAGGTATAGTAAAATGATTTCATCAATATGATATTTCATTATTTGTATTAGGTAATATTATTACTATTTTAACTGTATATCAATGATGACGAGATGTATCACGAGAAGGAACATACCAAGGATTACATACTTATGCAGTAACTATTGGTTTACGATGAGGAATAATTTTATTTATTTTATCAGAAGTTTTATTTTTTGTGAGATTTTTTTGAGCTTTTTTTCACAGAAGTTTATCACCCGCTATTGAATTAGGAGCATCATGACCTCCTATAGGAATTATCTCATTTAATCCATTTCAAATTCCTTTATTAAATACAGCTATTTTATTAGCTTCAGGAGTTACTGTAACTTGAGCCCACCATAGACTTATAGAAAATAATCATTCACAGACTACTCAAGGATTATTTTTTACAGTTTTACTAGGAATCTATTTTACAATTCTTCAAGCTTATGAATATATTGAAGCTCCATTTACTATTGCAGACTCAATTTATGGATCAACATTTTTTATAGCAACAGGATTTCACGGAATTCATGTATTAATCGGAACAACTTTTTTATTAGTATGTTTACTACGACATTTAAATAATCACTTCTCAAAAAATCATCATTTTGGTTTTGAAGCAGCTGCATGATATTGACATTTTGTCGATGTAGTTTGATTATTTTTATATATCACAATTTACTGATGAGGAGGATAATTATATTATTAATTAAATATCTATATAGTATAAAAGTATATTTGACTTCCAATCATAAGGTCTATTAATTAATAGTATAGATAATTTTTTCTATTATTTTTATTGCTTTATTAATTTTACTAATTACAACTATTGTTATATTTTTAGCTTCAATTTTATCAAAAAAAGCTTTAATCGACCGAGAAAAAAGATCCCCATTTGAATGTGGATTTGATCCAAAATCTTCATCTCGATTACCATTTTCTTTACGTTTTTTTTTAATTACTATTATTTTTTTAATTTTTGATGTAGAGATTGCATTAATTCTACCTATAATTATTATTATAAAATATTCTAATATTATAATTTGAACAATTACTTCAATTATTTTTATTTTAATTTTATTAATTGGATTATACCATGAATGAAATCAAGGAATGTTAAATTGATCAAACTAatatatttatatatatatatataGGGTTGTAGTTAAATATAACATTTGATTTGCATTCAAAAAGTATTGAATATTCAATCTACCTTATTAATTTAATAACTGAATATGAAGCGATTGATTGCAATTAGTTTCGACCTAATCTTAGGTAATTATACCCTTATTCTTTAATTGAAGCCAAAAAGAGGCATATCACTGTTAATGATATAATTGAATTTTAAATTCCAATTAAGGAAATATGATGATCAAGTAAAAGCTGCTAACTTTTTTCTTTTAATGGTTAAATTCCatttatatttctatttatatagtttaaataaaaccttacattttcattgtaataataaaatcttatatttttatAAATTACTAAAATTAATTCACTATATCCAAAGATTTAATAATCTCCATAACATCTTCAATGTCAAACTCTAGTATAAGCTATTTGGATATAAAAATAATAAAATTAATAAAATTAAAATTCAAAATACAAATAATAATAAATAAATTTTCAAAGAATTATTATGTATTAAAAATAAAGTTTTAGAATATATAGATAATTTTTGATATAAATGTTGACCTCCAAAATATTCTGATCAACCTTGATCAAAACTTTTTACAACTAATTGACCATAATTTAAAGGATAAAAAATTATACCATAAGTTCTAATATAAGGTATAAATCATATAGACCCTAAAAAAGTTCTTAAATTATATATAAATAAAGATTTATTTAAAAAAAATAAATTTCTTAAAGAAATTAAATATCCAAATAAACCCCCTACAATACATACAAATAATGTTAACAATTTTATATAAATAGGTAAACAAATTATATAAGGAAAAGGAAAAATCAATCAATTTAATATTCTACCTCCAATAATTCTTATAATTAATAATCCTATTATACCACGGAGTATAATTCAACTTTCATCATTTAATATATTCAATCTACCGCAATTTAAATCACCGGTTATTGAATAATAAACTAATCGAAATGAATAACTAACAGTTAAACCCGTAGAAAAATAGTATAAAAAAAATGAAAACATATTAACATTTCTAATTCTAACAATTTCTAAAATTATATCCTTAGAATAGAATCCAGCTAAAAAAGGTATTCCACATAAAGCTAAATTAGATACGTTAAAACAAGCTGAAGTTAAAGGTATATGAATTCTTAACCCCCCTATTAAACGAATATCTTGAGAATTATTTATATTATGAATAATAGCCCCAGCACATATAAACAATAATGCTTTAAATAAAGCATGAGTTAATAAATGAAATATAGCTAATTTTAAAAATCCTATAGACAAAATTCTTATTATTAAACCTAATTGACTTAAAGTAGATAAAGCAATAATTTTTTTTAAATCAAATTCAAAATTAGCTCCTAATCCAGCTATAAATATTGTTAATCCAGATAATAATAATATTAATTGTCCTAACCAAGAAGTTCTTAAGATAATATTAAATCGAATTAATAAATATACACCAGCTGTAACTAATGTAGAAGAATGAACTAAAGCAGAAACAGGTGTAGGAGCAGCTATAGCTGCAGGTAACCAAGAAGAAAAAGGAATCTGAGCTCTTTTAGTTATAGCAGCTAATATTACTAATCTTCCAATTATTAACATTTCAAATTCATTTTGTATAATTTCTAAATAAAAAATATAATTTCATCTTCCATAATTTAATATTCAAGCAATAGAAAGAAGTAAAGCTACATCCCCAATTCGATTAGATAACGCAGTTAATATACCAGCATTATAAGATTTAATATTTTGAAAATAAATTACTAAACAATAAGAAACAAGTCCTAAACCATCTCACCCTAATAAAATTCTAATTAAATTTGGTCTAATAATTAACAATATTATTGATAAAACAAATATTAATACTAATATAATGAATCGATTAATATGATTATCATTTATTATGTATTCTTTTCTATAAAAAATCACTAAAGAAGAAATTATAAGAACAAAAGATATAAATAATAAACTTATTCAATCAAAAAGAAAAGTTATAACAATTCTTATAGAATTTAAAGAAACTAATTCTCACTCAATAAAATAAATCATATCATTTAACAAAAAATATAAACTTAATAAAAAACATGATAAACTTATAGAAATTAAATTAACAAATCTAATTCTACAAATAGATAAATATTTCATGATTTAAAATGAATATTTTCATATCACTAACACCACAAATTAGTATTTTTTTTAAACTATTTAAATATAATCATAATATAAATGATTCTCTTTTTAAAATTAATAAATTTAAAGGCAATCAATGTAACAATATTAATAAATATTCTCGAATTTTACCTCTTCTAAATGAATATACTCCAGAAAATAATTTACCATGCTGACTAAAAGAATATAAATATAAAGTATAAGCAGCTCTAAAAAAAGATAAAAAAGATAATAAAATTATAGAAATTCAAGATCAAGAAACAATTCTATTTAATAAATAAATTTCTCCTAATAAATTTAATGTTGGAGGAGCTGCTATATTAGCTGATCTTAATAAAAATCATCATAAAGTTATCGAAGGTATAAAATTTAATAAACCTTTATTAATTAATATTCTTCGACTTCCAAGACGTTCATAAGATACATTAGCTAAACAAAATAACCCAGAAGAACATAAACCATGAGCAATTATTAATGTATAAGAACCACATAAACCTCAATAAGTTATAGTTAAAAGTCCTGATAGAACAATTCCTATATGAGCAACAGATGAATAAGCAATTAAAGCCTTTAAATCAGTTTGACGTAAACAAACTAATCTAACTAATACACCTCCTActaatctaattctaattcaaacaaatctatacttcaaattTATTAACTGTAAAAAACTAATAACTCGTAATATTCCATAACCTCCTAATTTTAATATAATACCTGCTAAAATTATAGACCCAGAAACTGGAGCTTCAACATGAGCTTTAGGTAATCATAAATGAACTAAAAATATTGGTATTTTTACTAAAAAGGCACACAATAAACAAAAATATAATAAATCGTAATTAAACATAAAATTATTTATTAAATAAAAATTTATAGAACCAATTTTATTTATTaaataaaaaataccaattaatataggtaaagaaactaataaagtataaaataataaatataaaCCAGCTTGTAAACGTTCTGGCTGATAACCTCAACCTAAAATTAAAAATAATGTAGGAATTAGTCTTCTTTCAAAAAATAAATAAAATATAAATAATCTTATTCTTGAAAAAGTTAAAATCAACAATAATAATAAAATAATAATATTTAATAAAAATAAATTTTTATAATTATTATGTTTATTAATTATTTCTCTAGCTAATAATATTAATGAACAAATTCATAAACTTAATAAAATTAATCCATAAGATAATATATCACAACCTAAAAAATAAGAAATTTCTGATCAATAATTTATAAAATTATTTATTAATAAAAAAATAAATCTAATAAAAAATATTATAATTTGTACCATTCAATATATATTATTAATAAAACAAAAAGGAATTAAAAATAATAAAAAAAAAATAATTTTTAACATTATATAATTCTAAAAGATTGAAAATAATCATTACCATGAGTACGAATTATAGAAACTAAAATTGATAAACCTAAGGCCCCTTCACATACTCTAAATGTCAAAAATATTATTCTAAAATAACTTTCATAATTTAATATATTTAAATAAATAAATAATATAAAAAATAATATTAAAACAATAAATTCTAAACTTAAAAGTATTGAAAGTAAATGTTTCCGATTAGAAACAAAACAAAATAACCCTAAAATAAATAAAATTATAGGTAAACTTCAATATAAAATTATAATCATTAGTTTTAATAGTTTAATAAAAACATTGGTCTTGTAAATCAAAAATAAGATTATTTCTTTTAAAACTTCAAGAGAAAAGAAATTTCTTTTTCATTAATCCCCAAAATTAATATTTTAAATAAACTACCTCTTGAAATTATTCAATTAATATTATATTCATTAATTATTACTACTTCCATTATTTTTCTAAATATAATTCATCCATTAGCTTTAGGATTAACTTTATTAATTCAAACAATTTTTGTATGTTTACTAACTGGATTAATAACTAAAAGTTTTTGATATTCATATATTTTATTTTTAATTTTTTTAGGAGGAATACTTGTATTATTTATTTACGTAACATCTTTAGCCTCTAATGAAATATTTAATTTATCAATAAAATTAACTCTATTTTCTTCATTAATTTTAATTTTTATATTAATTTTATCATTTATTATAGATAAAACTTCTTCTTCTTTATTTTTAATAAATAATGATATACAATCTATTATTAATATAAATTCTTATTTTATAGAAAATTCTTTATCTTTAAATAAATTATATAATTTTCCTACAAATTTTATTACAATTTTATTAATAAATTATTTATTAATTACTTTAATTGTTATTGTAAAAATTACAAAATTATTTAAAGGACCTATTCGAATAATATCTTAATTAATGAATAAACCTTTACGAAATTCCCATCCTCTATTTAAAATTGCCAATAATGCTTTAGTAGATTTACCAGCTCCAATTAATATTTCAAGATGATGAAATTTTGGATCATTACTTGGATTATGTTTAATTATTCAAATTTTAACCGGATTATTTTTAGCTATACATTACACAGCTGATATTAATCTAGCTTTCTATAGTGTTAATCATATTTGTCGAGACGTTAATTATGGTTGATTATTACGAACTTTACATGCTAACGGTGCATCATTTTTTTTTATTTGTATTTACTTACATGTAGGACGAGGAATTTATTACGGTTCATATAAATTTACTCCAACTTGATTAATTGGAGTAATTATTTTATTTTTAGTAATAGGAACAGCTTTTATAGGATACGTATTACCTTGAGGACAAATATCATTTTGAGGAGCTACTGTAATTACTAATTTATTATCAGCTATCCCTTACTTAGGTATAGATTTAGTTCAATGATTATGAGGTGGATTTGCTGTTGATAATGCCACTTTAACTCGATTTTTTACATTCCATTTTATTTTACCTTTTATTGTTCTTGCTATAACTATAATTCATTTATTATTCCTTCATCAAACAGGATCTAATAATCCTATCGGATTAAATTCTAATATTGATAAAATTCCTTTTCATCCTTATTTTACATTTAAAGATATTGTAGGATTTATTGTAATAATTTTTATTTTAATTTCATTAGTATTAATTAGACCAAATTTATTGGGAGACCCTGATAATTTTATTCCAGCAAATCCTTTAGTAACACCTGCCCATATTCAACCAGAATGATATTTTTTATTTGCTTATGCTATTTTACGATCTATTCCAAATAAATTAGGAGGAGTTATTGCATTAGTTTTATCAATTGCAATTTTAATAATCCTTCCTTTTTATAATTTAAGAAAATTCCGAGGGATTCAATTTTATCCTATTAATCAAGTAATATTCTGATCTATATTAGTAACAGTAATTTTATTAACTTGAATTGGAGCTCGACCAGTTGAAGAACCTTATGTATTAATTGGACAAATTCTAACTGTTGTATATTTCTTATATTATTTAGTAAACCCATTAATTACAAAATGATGAGATAATTTATTAAATTAAATAGTTAATGAGCTTGAATAAGCATATGTTTTGAAAACATAAGATAGAATTTAATTTTCTATTAACTTTTACTAAAAAAAATTCACTataataaagaaaataataaaattttaaacccaataaaaaataataaataatTTAAAGAAAAAGATAAAAAACATTTTCAAGCTAAATATATTAATTTATCATAACGAAATCGAGGTAAAGTTCCTCGAACTCAAATAAAAACAAAAGAAATAAAAGTTAATTTTATATAAAATAATAAATTAAACACATCACAACCTAAAAAAATAACGCAAAATAATATTCTTATAAATAAAATTCTCGCATATTCAGCTATAAAAATTAAAGCAAAACCCCCTCTTCTATATTCTACATTAAATCCTGAAACTAATTCTGATTCTCCTTCAGCAAAATCAAAAGGAGTCCGATTAGTTTCAGCTAATGAAATAGATATTCAAACTAAAGCTATAGGAAATAAAATAATTAAAAATCACATATAAACTTGATAAAAAAAAAAATAAATTATATTATAACTTCCAATTAAAAAAATAAAAGATAATAAAATTAAAGCTAAACTAACTTCATAAGAAATAGTCTGAGCCACAGCTCGCAAACCTCCTAATAAAGCATAATTAGAATTAGACGACCAACCAGCTACTATAACAGTATAAACCCCCAATCTAGTACAACATAAAAAAAATAAACCCCCCAAATTAAAAGAATATAATTTTACAAAAAAAGGTATACATATTCAAACAAATAATGATAAAAATAAAGAAAAAATTGGAGAAATATAATATCTTAAATAATTAGATAATAAAGGATAAGTTTGTTCTTTTGTAAATAATTTAATCGCATCACAAAAAGGTTGAGGAATTCCTATTAAACCAACTTTATTAGGACCTTTACGAATTTGAATATATCCTAAAACTTTTCGTTCTAATAAAGTTAAAAAAGCTACACTTACTAATACACAAATAATTAATAACAAACTACCAATTAATGACAAAATAAATTCTATATAAAACAAGTACTATTTGTAATAAAAATCACATATATAAATTCTAAATTTATTGCACTAATCTGCCAAAATAGTTTTATATTAATAATATTCTTATAAAAAATATAATTATTTTGATATTTGGTCCTTTCGTACTAAAATATCATAATTTTTTAAAGATAGAAACCAACCTGGCTTACACCGGTTTGAACTCAGATCATGTAAGAATTTAAAAGTCGAACAGACTTAAAATTTGAACGGCTACACCCAAAATTATATCTTAATCCAACATCGAGGTCGCAATCTTTTTTATCGATATGAACTCTCCAAAAAAATTACGCTGTTATCCCTAAAGTAACTTAATTTTTTAATCATTATTAATGGATCAAATATTCATAAATTTATGTTTTTAAAAAATTAAAAGTTTTTTAAATTTTAATATCACCCCAATAAAATATTTTTATTTATTAAAATTTAATTAATCTATATAATTAAAATAAAAAAAAATATAAAGATTTATAGGGTCTTCTCGTCTTTTAAATAAATTTTAGCTTTTTGACTAAAAAATAAAATTCTATAAAAATTTTAAATGAAACAGTTAATATTTCGTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATGCTACCTTTGCACAGTCAAAATACTGCGGCCATTTAAAATTTTCAGTGGGCAGGTTAGACTTTATATATAATTCAAAAAGACATGTTTTTGTTAAACAGGCGAATATTATTTTTGCCGAATTCTTTATTTAAACTTTTCATATAAATTAATTTTAACATTATTATATACTAATTTTATCATTATTACTTAATTTTAATAATTAAAACTAACATTTTAATAAATAATTAAAATTTAATAAATAATTTAATTTATAAAATAAATTATAACATATTTTTTAATAATTGCTAATTCTAAGCATATATTTATTAAATCTATTTAATATTTTTAAAAATTTATTTTATAGCTTATCCCATAAAACATTAAAATTATAAATTAATTAATTAAATAAATAATTAAGTAAATTTATAATTTCTAAATTAAATTTATTTCTTAAAAAACTAGATACCTTTAAAAACGAATAACATTTCATTTCTAATATAATATTATAAATAATTTTATCACATTAACTTAAATATTATATTAACTCTTTTAAAATCGAGAAAAATAAATATTTATTTTTTATTTAATAAACACTGATACACAAGGTACAATAAATTAAATTTTCTTTTAAAATAAAATTTTTTCAAATTATTTCAATTTTCTTTTACAATACTAATAAACTATTATTAAAATTATTTTTTCTTTAAACAATACTAAAACTTTAAATTTTATAGTTATTTCTAATAATTTTTTAAAAAATAATAAAAATTAATAAATAAAAACTAACTCAATTTATATTGATTTGCACAAAAATCTTTTCAATGTAAATGAAATACTTTACTTAATAAGCTTTAAATTGTCATTCTAGATACACTTTCCAGTACATCTACTATGTTACGACTTATCTTACCTTAATAATAAGAGCGACGGGCGATGTGTACATATTTTAGAGCTAAAATCAAATTATTAATCTTTATAATTTTACTACTAAATCCACTTTCAAAAATTTTTTCATAATTTTATTCATATAAATAAATTTATTGTAACCCATTATTACTTAAATATAAGCTACACCTTGATCTGATATAAatttttattaaaattattgaatattattattcttatAAAATATTCTGATAACGACGGTATATAAACTGATTACAAATTTAAGTAAGGTCCATCGTGGATTATCGATTAAAAAACAGGTTCCTCTAGATAGACTAAAATACCGCCAAATTTTTTAAGTTTCAAGAACATAACTATTACTACTTTAGCAATTTATTTACATTTTAAATAATAGGGTATCTAATCCTAGTTTTTTATTAAAATTTTTTAACCTCAATTACATTTTTATATAATAATTTAAATATAAAATTTCACTTAATATATTTAATTTTATTATTATTAATAAATTTAATTTAATTAATACTAAAAAAATTTATTTGTATTAATGGTATAACCGCGACTGCTGGCACCAATTTAGTCAATACTTTTTTATATTGCTATTTCTAAATTTCTTTAATTAATAATATTAATTACTGCGAATAAATTTTCATATTTATTTTTTAAATAAATATAAAATCACACAAAAATTTACATATAAATCAAATTAATAACAAATTTTTAAGCCAAAATAAAACTTTAAATTTTTATTTTTGATTTTTTATTATTAATTAAATATTAATAATTTTTATTAAAATAATTTTTTAAAGAAAAATTAAAATTAATTTTAATTAAATATTAAAATAATTTAATTTTATAATAAAATTTTTATCATATTATAATAATATAAAAATTTTATAAATTTATTTTTTAAATTTTACAAAATTTTTAAAATTTTTATTTTTTTTAAAAAAAATAATTTTTAACAAAAAAAATTTTTATCAAAAATTAATATAAAATAAATTTTAATTTAAAAATTAAAAATTTTAATTTTACACTTTTTTAAAAATATTTTTTTTTAAAAAAAAAATTTTTTTTTAAAAAAATTTTTTTTTAAAAAAAATTTAAAAAATTATAGATTAATTTCTTTTAAATGACTAAAAAAAATTTTTTTTTTTAAGTATTTTAAAACTTTTTTTTTACAATTTTTAAAAAAATATATAAATATAAATTTTAAAAAAAATTTTTTTTTTAAAAAAAATGAAAATTATATTATAAAAATATTTTTTTTACAAAAATGAAAATTTAATCTATTAAAAAAAATTATTAAAATTTTTATAAATAAATAAAAAAAGTAATAAATTTATTAAAAATCAATATATATATAATAATAAATAATTTGATTATTAATTAAATTATACGAATAATAAATATAATAAATAATTTATTTTAATCAATAAATCTGAAATAATTAATTATATACATATATATATATATGTAAATAAATAAAAATAAATTTATTCCCCCTATTTATAAATTTATTATATAATTAAAACTTAAAAAATATTTTTTTTAAAAAAATAGTTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATAAATAAAATTATTTAAAATAATTAATAAAAATATTTTTATTGTAATAAAAATTAAAAATAATTTTAAAAAAATTAAATTTATATATTTATATATATATATATATAATTTTTAATTTTCAATTAAATTATATAAATATAATAAAATAATTTTATTTAATCACTAAATCTGAAATAATTAATtataaatatatatatatatatatatatatatatatatatataAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTGTATAATTAAAACTTAAAAAATATTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATAAATAAAATTATTTTAAATAATTAATAAAAATATTTTTAATATAATAAAAATTTAAAATGATTTTTTATAAAAATTAAATTCATATTTATATATATATATATATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATATATATATATATATATAAAAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTTATAAAAATATTTTTAATATAATGAAAATTTAAAATGATTTTTTATTATTAATTAAATTCatatatttacatatatatatatatatatatatatatataGATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATGTATATATATATATATATATATAAAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTAAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTAATAAAAATATTTTTAATATAATGAAAATTTAAAATGATTTTTTATTATTAATTAAATTCatatatttatatatatatatatatatatatatataGATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATGtatatatatatatatatatatatatatatataAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTAATAAAAATTATATATATATATAAATGAAAATAATTTTTAAATTTTAATAATAAATAAATTTAATAATTAATAATTAAATAAAATCTATTCATTATTAATATTTAATTAATAATAAATAAATTTAATAACTAATAATTAAATAAAATTTATTTATTATTAATATTTAATTAATAATAAAAAATCATCAttttttttttttttttttttatttAATTAATTATtatatatttataaatttatatattattcaatatttataatatatatatatatatatatatataAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATTATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAATTTAAAAAAAAATAATTTTTTTTTAAAAAAAAACTATATACTAATTATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAAATTTTTTAAAAAAAAATAATTTTTTTTTTAAAAAAACTATATACTAATTATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAATTTTTTAAAAAAAAATAATTTTTTTTTAAAAAAACTATATACTAAATATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTTAAAAAAAATTTTTTAAAAAAAATAATTTTTTTTTAAAAAAACTATATACTAAATATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATATAATATATATATATATAGAAAAATAAAATTATTTAAATAATTTTACATAAAATTTTAAAAAATTTCTTAAATGTATTATTTAATAAAAAATTACTTTTTAAAAAAAATAATTTTAATTTTTTaaaaaaaatagtaaataataaaaaaaaaaaaaaaaaaaaatgaaaaTTATATTATT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/vep_versioned_annotation_cache.loc Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,3 @@ +#<value> <dbkey> <version> <cachetype> <name> <species> <path> +# +drosophila_melanogaster_vep_106_BDGP6.32 dm6 106 default Drosophila melanogaster dm6 (V106) drosophila_melanogaster ${__HERE__}/test-cache
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/dbkeys.loc.sample Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,1 @@ +#<dbkey> <display_name> <len_file_path> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/vep_versioned_annotation_cache.loc.sample Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,11 @@ +#This file describes vep cache data and its metadata available on the server. +#The data table has the format (white space characters are TAB characters): +# +#<value> <dbkey> <version> <cachetype> <name> <species> <path> +# +#So, vep_versioned_annotation_cache.loc tables could look like this: +# +#homo_sapiens_vep_105_GRCh38 hg38 105 default Homo sapiens hg38 (V105) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/default +#homo_sapiens_refseq_vep_105_GRCh38 hg38 105 refseq Homo sapiens hg38 (V105, Refseq) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/refseq +#homo_sapiens_merged_vep_105_GRCh38 hg38 105 merged Homo sapiens hg38 (V105, Merged) homo_sapiens /path/to/vep_versioned_annotation_cache/105/hg38/merged +# \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,17 @@ +<tables> + <!-- Table of installed versioned vep cache data --> + <table name="vep_versioned_annotation_cache" comment_char="#"> + <columns>value, dbkey, version, cachetype, name, species, path</columns> + <file path="tool-data/vep_versioned_annotation_cache.loc" /> + </table> + <!-- Locations of dbkeys and len files under genome directory --> + <table name="__dbkeys__" comment_char="#"> + <columns>value, name, len_path</columns> + <file path="tool-data/dbkeys.loc" /> + </table> + <!-- Location of SAMTools indexes for FASTA files --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,12 @@ +<tables> + <!-- Table of versioned vep cache data for testing --> + <table name="vep_versioned_annotation_cache" comment_char="#"> + <columns>value, dbkey, version, cachetype, name, species, path</columns> + <file path="${__HERE__}/test-data/vep_versioned_annotation_cache.loc" /> + </table> + <!-- Locations of all sam indexes under genome directory --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/fasta_indexes.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vcf2maf.xml Tue Jun 28 21:07:04 2022 +0000 @@ -0,0 +1,153 @@ +<tool id="vcf2maf" name="Convert VCF to MAF" version="@TOOL_VERSION@"> + <description>with vcf2maf</description> + <macros> + <token name="@TOOL_VERSION@">1.6.21</token> + <token name="@DB_VERSION@">106</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">vcf2maf</requirement> + <requirement type="package" version="@DB_VERSION@.1">ensembl-vep</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + ln -s '${input1}' MainInput.vcf && + #if $ref_seq.ref_source == "cached": + ln -s '${ref_seq.ref.fields.path}' reference.fa && + #elif $ref_seq.ref_source == "history": + ln -s '${ref_seq.ref}' reference.fa && + #end if + vcf2maf.pl --input-vcf MainInput.vcf --output-maf MainOutput.maf --ref-fasta reference.fa + #if $annotation_cache.source == "no_vep": + --inhibit-vep + #else: + --vep-path \$(dirname \$(which vep)) + --vep-data '${annotation_cache.cache_file.fields.path}' + --species '${annotation_cache.cache_file.fields.species}' + --ncbi-build '${annotation_cache.cache_file.fields.value.split($annotation_cache.cache_file.fields.version + "_")[-1]}' + #if $annotation_cache.cache_file.fields.version != "@DB_VERSION@": --cache-version $annotation_cache.cache_file.fields.version + #end if + + #if $tumor_id: + --tumor-id '${tumor_id}' + #end if + #if $normal_id: + --normal-id '${normal_id}' + #end if + #if $vcf_tumor_id: + --vcf-tumor-id '${vcf_tumor_id}' + #end if + #if $vcf_normal_id: + --vcf-normal-id '${vcf_normal_id}' + #end if + + #if $adv_opt.any_allele: + --any-allele + #end if + #if $adv_opt.min_hom_vaf: + --min-hom-vaf $adv_opt.min_hom_vaf + #end if + #if $adv_opt.maf_center: + --maf-center '${adv_opt.maf_center}' + #end if + #if $adv_opt.retain_info: + --retain-info '${adv_opt.retain_info}' + #end if + #if $adv_opt.retain_fmt: + --retain-fmt '${adv_opt.retain_fmt}' + #end if + #if $adv_opt.retain_ann: + --retain-ann '${adv_opt.retain_ann}' + #end if + ]]></command> + <inputs> + <param type="data" name="input1" label="VCF input file" format="vcf"> + <validator type="unspecified_build" /> + </param> + <conditional name="ref_seq"> + <param name="ref_source" type="select" label="Select FASTA file as reference sequence"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="ref" type="select" label="Select reference sequence"> + <options from_data_table="fasta_indexes"> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref" type="data" format="fasta" label="Select reference sequence" /> + </when> + </conditional> + <conditional name="annotation_cache"> + <param name="source" type="select" label="Select the source of annotation data if you want to use VEP" help="vcf2maf can utilize Ensembl's VEP to select a single effect per variant. VEP can only be used if SIFT is available for the selected genome assembly. Ensembl strongly recommends to only use annotation cache files with a version number matching the VEP version. You can disable the corresponding filtering of available cache files at your own risk."> + <option value="no_vep" selected="true">Do not use VEP</option> + <option value="restricted">Use VEP with a cache file with matching version number</option> + <option value="unrestricted">Use VEP with any cache file</option> + </param> + <when value="no_vep"/> + <when value="restricted"> + <param name="cache_file" type="select" label="Select annotation cache file" help="If the annotation data of interest is not listed, have a look at all available cache files regardless of their version number or contact your Galaxy admin."> + <options from_data_table="vep_versioned_annotation_cache"> + <filter type="static_value" value="@DB_VERSION@" column="2" /> + <filter type="sort_by" column="4"/> + </options> + <validator type="no_options" message="No annotation caches are available"/> + </param> + </when> + <when value="unrestricted"> + <param name="cache_file" type="select" label="Select annotation cache file" help="If the annotation data of interest is not listed, contact your Galaxy admin."> + <options from_data_table="vep_versioned_annotation_cache"> + <filter type="sort_by" column="4"/> + </options> + <validator type="no_options" message="No annotation caches are available"/> + </param> + </when> + </conditional> + + <param argument="--tumor-id" type="text" optional="true" label="Enter tumor sample ID (optional)" help="Used to fill the Tumor_Sample_Barcode column of the output MAF with the tumor sample ID."/> + <param argument="--normal-id" type="text" optional="true" label="Enter normal sample ID (optional)" help="Used to fill the Matched_Norm_Sample_Barcode column of the output MAF with the normal sample ID."/> + <param argument="--vcf-tumor-id" type="text" optional="true" label="Enter name of tumor genotype column (optional)" help="VCFs from variant callers like VarScan use hardcoded sample IDs TUMOR/NORMAL to name genotype columns. Use this parameter to have vcf2maf correctly locate these columns to parse genotypes, while still printing proper sample IDs in the output MAF."/> + <param argument="--vcf-normal-id" type="text" optional="true" label="Enter name of normal genotype column (optional)" help="VCFs from variant callers like VarScan use hardcoded sample IDs TUMOR/NORMAL to name genotype columns. Use this parameter to have vcf2maf correctly locate these columns to parse genotypes, while still printing proper sample IDs in the output MAF."/> + + <section name="adv_opt" title="Advanced options"> + <param argument="--any-allele" type="boolean" optional="true" checked="false" label="Allow also mismatched variant alleles when reporting co-located variants"/> + <param argument="--min-hom-vaf" type="float" optional="true" min="0" max="1" label="Enter minimum allele fraction to call a variant homozygous if GT is undefined in VCF" help="Default value is 0.7"/> + <param argument="--maf-center" type="text" optional="true" label="Enter variant calling center to report in MAF"/> + <param argument="--retain-info" type="text" optional="true" label="Enter comma-delimited names of INFO fields to retain as extra columns in MAF"/> + <param argument="--retain-fmt" type="text" optional="true" label="Enter comma-delimited names of FORMAT fields to retain as extra columns in MAF"/> + <param argument="--retain-ann" type="text" optional="true" label="Enter comma-delimited names of VEP annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF"/> + </section> + </inputs> + <outputs> + <data name="output1" format="tabular" from_work_dir="MainOutput.maf" /> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input1" dbkey="hg19" value="input_test1.vcf" ftype="vcf" /> + <param name="ref_source" value="history" /> + <param name="ref" dbkey="hg19" value="test1.fa" ftype="fasta" /> + <param name="annotation_cache.source" value="no_vep" /> + <output name="output1" file="output_test1.tabular" ftype="tabular" /> + </test> + <test expect_num_outputs="1"> + <param name="input1" dbkey="hg19" value="input_test1.vcf" ftype="vcf" /> + <param name="ref_source" value="cached" /> + <param name="ref" value="hg19test" /> + <param name="annotation_cache.source" value="no_vep" /> + <output name="output1" file="output_test1.tabular" ftype="tabular" /> + </test> + <test expect_num_outputs="1"> + <param name="input1" dbkey="dm6" value="input_test2.vcf" ftype="vcf" /> + <param name="ref_source" value="history" /> + <param name="ref" dbkey="dm6" value="test2.fa" ftype="fasta" /> + <param name="source" value="restricted" /> + <param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" /> + <output name="output1" file="output_test2.tabular" ftype="tabular" /> + </test> + </tests> + <help><![CDATA[ + The tool vcf2maf can parse a wide range of VCF-like formats and convert these into the `Mutation Annotation Format (MAF) <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`__. A central part of the conversion process is the selection of a single effect per variant. While this is often a subjective decision, vcf2maf offers a standardized way to achieve this by optionally utilizing Ensembl's `Variant Effect Predictor (VEP) <https://www.ensembl.org/info/docs/tools/vep/index.html>`__. ]]></help> + <citations> + <citation type="doi">10.5281/zenodo.593251</citation> + </citations> +</tool> \ No newline at end of file