Mercurial > repos > gga > apollo_fetch_jbrowse
changeset 13:6905bb3ce9db draft
"planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit 7bf18c69becc57152efdadedf7ebd0bcfa382a15"
author | gga |
---|---|
date | Mon, 29 Jun 2020 09:13:41 -0400 |
parents | 30ea2be4a236 |
children | b7c44671f012 |
files | create_features_from_gff3.py export.py fetch_organism_jbrowse.py macros.xml test-data/create_org/output.json test-data/create_org/output2.json test-data/export/cdna.fa test-data/export/cds.fa test-data/export/pep.fa test-data/load_gff3/output.tsv test-data/load_gff3/output.txt |
diffstat | 11 files changed, 727 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- a/create_features_from_gff3.py Fri Apr 24 09:03:01 2020 -0400 +++ b/create_features_from_gff3.py Mon Jun 29 09:13:41 2020 -0400 @@ -1,5 +1,6 @@ #!/usr/bin/env python import argparse +import json import logging from apollo import accessible_organisms @@ -16,6 +17,8 @@ parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') parser.add_argument('email', help='User Email') parser.add_argument('--source', help='URL where the input dataset can be found.') + parser.add_argument('--use_name', action='store_true', help='Use the given name instead of generating one.') + parser.add_argument('--disable_cds_recalculation', action='store_true', help='Disable CDS recalculation and instead use the one provided.') OrgOrGuess(parser) parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file') @@ -42,4 +45,5 @@ if not orgs: raise Exception("You do not have write permission on this organism") - wa.annotations.load_gff3(org_cn, args.gff3, args.source) + load_result = wa.annotations.load_gff3(org_cn, args.gff3, args.source, use_name=args.use_name, disable_cds_recalculation=args.disable_cds_recalculation) + print(json.dumps(load_result, indent=2))
--- a/export.py Fri Apr 24 09:03:01 2020 -0400 +++ b/export.py Mon Jun 29 09:13:41 2020 -0400 @@ -52,28 +52,28 @@ uuid_gff = wa.io.write_downloadable(org['commonName'], 'GFF3', export_gff3_fasta=True, sequences=seqs) if 'error' in uuid_gff or 'uuid' not in uuid_gff: - raise Exception("Apollo failed to prepare the file for download: %s" % uuid_gff) + raise Exception("Apollo failed to prepare the GFF3 file for download: %s" % uuid_gff) args.gff.write(wa.io.download(uuid_gff['uuid'], output_format="text")) time.sleep(1) uuid_vcf = wa.io.write_downloadable(org['commonName'], 'VCF', sequences=seqs) if 'error' in uuid_vcf or 'uuid' not in uuid_vcf: - raise Exception("Apollo failed to prepare the file for download: %s" % uuid_vcf) + raise Exception("Apollo failed to prepare the VCF file for download: %s" % uuid_vcf) args.vcf.write(wa.io.download(uuid_vcf['uuid'], output_format="text")) time.sleep(1) uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', sequences=seqs, seq_type='cdna') if 'error' in uuid_fa or 'uuid' not in uuid_fa: - raise Exception("Apollo failed to prepare the file for download: %s" % uuid_fa) + raise Exception("Apollo failed to prepare the cdna FASTA file for download: %s" % uuid_fa) args.fasta_cdna.write(wa.io.download(uuid_fa['uuid'], output_format="text")) time.sleep(1) uuid_fa = wa.io.write_downloadable(org['commonName'], 'FASTA', sequences=seqs, seq_type='cds') if 'error' in uuid_fa or 'uuid' not in uuid_fa: - raise Exception("Apollo failed to prepare the file for download: %s" % uuid_fa) + raise Exception("Apollo failed to prepare the cds FASTA file for download: %s" % uuid_fa) args.fasta_cds.write(wa.io.download(uuid_fa['uuid'], output_format="text")) time.sleep(1)
--- a/fetch_organism_jbrowse.py Fri Apr 24 09:03:01 2020 -0400 +++ b/fetch_organism_jbrowse.py Mon Jun 29 09:13:41 2020 -0400 @@ -57,7 +57,7 @@ if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') + parser = argparse.ArgumentParser(description='Script to fetch organism data dir from apollo') OrgOrGuess(parser) parser.add_argument('target_dir', help='Target directory') parser.add_argument('email', help='User Email')
--- a/macros.xml Fri Apr 24 09:03:01 2020 -0400 +++ b/macros.xml Mon Jun 29 09:13:41 2020 -0400 @@ -1,10 +1,10 @@ <?xml version="1.0"?> <macros> - <token name="@WRAPPER_VERSION@">4.2.1</token> + <token name="@WRAPPER_VERSION@">4.2.3</token> <xml name="requirements"> <requirements> - <requirement type="package" version="4.2.1">apollo</requirement> + <requirement type="package" version="4.2.3">apollo</requirement> <yield/> </requirements> </xml>
--- a/test-data/create_org/output.json Fri Apr 24 09:03:01 2020 -0400 +++ b/test-data/create_org/output.json Mon Jun 29 09:13:41 2020 -0400 @@ -1,7 +1,7 @@ { "commonName": "Test org", "blatdb": "/data/temporary/apollo_data/1384-Test_org/searchDatabaseData/genome.2bit", - "metadata": "{\"creator\":\"20\"}", + "metadata": "{\"creator\":\"xx\"}", "annotationCount": 0, "currentOrganism": true, "obsolete": false, @@ -11,6 +11,6 @@ "valid": true, "genus": "genus", "species": null, - "id": 23, + "id": "xx", "nonDefaultTranslationTable": null }
--- a/test-data/create_org/output2.json Fri Apr 24 09:03:01 2020 -0400 +++ b/test-data/create_org/output2.json Mon Jun 29 09:13:41 2020 -0400 @@ -1,7 +1,7 @@ { "commonName": "Test org", "blatdb": "/data/temporary/apollo_data/1384-Test_org/searchDatabaseData/genome.2bit", - "metadata": "{\"creator\":\"20\"}", + "metadata": "{\"creator\":\"xx\"}", "annotationCount": 0, "currentOrganism": true, "obsolete": false, @@ -11,6 +11,6 @@ "valid": true, "genus": "genus2", "species": "sp", - "id": 23, + "id": "xx", "nonDefaultTranslationTable": null }
--- a/test-data/export/cdna.fa Fri Apr 24 09:03:01 2020 -0400 +++ b/test-data/export/cdna.fa Mon Jun 29 09:13:41 2020 -0400 @@ -1,4 +1,4 @@ ->ff2fe902-7bab-431c-be82-30ed072915d1 (mRNA) 690 residues [Merlin:2-691 + strand] [cdna] name=Unknown +>df4157db-1481-4120-858c-e6a4c0f78547 (mRNA) 690 residues [Merlin:2-691 + strand] [cdna] name=Merlin_1_mRNA-00001 CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT @@ -11,13 +11,13 @@ CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC ATTGATGACATGCCACGTAAGTTCCGCTAG ->f2e1909a-1d40-4a49-a67b-5fe2afdc4957 (mRNA) 288 residues [Merlin:752-1039 + strand] [cdna] name=Unknown +>eedd9b31-f1dd-487c-a3ab-6435b2214685 (mRNA) 288 residues [Merlin:752-1039 + strand] [cdna] name=mrna-name-00001 ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG TCTTATGAATTCAATGAAGTTGTTTTCAAAGAGCTTAAGAAAATTTTAGGCGATAAGAAG CTTCAAAGTACTCCAATTGGACGTTTTGGAATGAAAGAAAACGTTGATACTTATATTGAA AGTGTAGTGACAGGGCAGTTAGAAGGTGAATTTTCTGTAGCAGTTCAAACTGTAGAAAAT GATGAAGTTATTTTAACTTTACCAGCTTTCGTAATTTTCCGCAAATAA ->12fe0db6-c8e1-4bc9-b594-87c92c6c9669 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna] name=Unknown +>8cd6db6d-b818-4b65-b6f2-de79844a5641 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cdna] name=Merlin_3_mRNA-00001 ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT ATGTTTAGTTGTGTATTTGCAACTACTCCGTCAGCAAAGTCTCAACAATTACTCGATCAA TTTGGCGGTATGCTCTTTAATAACCTTCCGTTGAATAATGACTGGCTTGGATTAACACAA @@ -34,7 +34,7 @@ GATTGGATTGAAGATAGAGCTGTTAATTCTATAACTGGAATTAATAGTGAAATGTCTCTT AATGGAAGTTTAAGTAGATTATCTAGACTTGGAGGAGCTGCTGGAGGGTTGTCTCACGTC ATTAATTCGACCCGAAACTCTACTTCGAAAATACTTGGATTGTAA ->58fc8255-95ed-4417-a373-238f826810ac (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cdna] name=Unknown +>125a44c4-44da-4854-9d3b-ae2aa0221120 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cdna] name=Merlin_4_mRNA-00001 ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT @@ -53,7 +53,7 @@ TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA ->c009dd7a-3284-4e7f-9ee1-3b56e2598e07 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna] name=multiexongene +>87414073-7db0-4a60-903f-5e3593e55884 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cdna] name=Merlin_5_mRNA-00001 ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC @@ -82,7 +82,7 @@ AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA ->2706ea76-172a-48c1-b940-eb603996f082 (mRNA) 1056 residues [Merlin:5011-6066 - strand] [cdna] name=cds-not-under-exon +>1dcb3fc3-3307-4476-9c8a-04d252f9371f (mRNA) 1056 residues [Merlin:5011-6066 - strand] [cdna] name=Merlin_42_mRNA-00001 CTTTAATGACGCTGGTGAATCAATAAAAGAGATGATCGGTGCAATTTATGAATCAAAACC TCTTATAGCACCTGCGATGAACACAATCAACACATATGTTCCTCGAGTTCCATGGACGAG TAACATAACTGAATACAAGAAATATGTTCGAGATGTTGCATTAGCAGTAGATAATGACCA
--- a/test-data/export/cds.fa Fri Apr 24 09:03:01 2020 -0400 +++ b/test-data/export/cds.fa Mon Jun 29 09:13:41 2020 -0400 @@ -1,4 +1,4 @@ ->ff2fe902-7bab-431c-be82-30ed072915d1 (mRNA) 690 residues [Merlin:2-691 + strand] [cds] name=Unknown +>df4157db-1481-4120-858c-e6a4c0f78547 (mRNA) 690 residues [Merlin:2-691 + strand] [cds] name=Merlin_1_mRNA-00001 CGTTTAGACAAAGGTACATTATTGTATCGTGGCCAAAAATTAGACCTTCCTACATTCGAG CATAACGCAGAGAATAAGTTGTTCTATTTCAGAAACTACGTTTCAACTTCATTAAAGCCT CTGATCTTTGGTGAATTTGGTCGTATGTTTATGGCACTAGATGACGATACTACAATTTAT @@ -11,12 +11,30 @@ CTGTTCATGGAAACAGGTGAAGTAGTAAAACTGTCCGGATTCATGCAGTTCGTCAACGAA TCTGCATACGATGAAGAGCAAAACCAGATGGCTGCTGAGATTCTGTCTGGATTCTTGGAC ATTGATGACATGCCACGTAAGTTCCGCTAG ->f2e1909a-1d40-4a49-a67b-5fe2afdc4957 (mRNA) 9 residues [Merlin:752-1039 + strand] [cds] name=Unknown -AAATTTTAG ->12fe0db6-c8e1-4bc9-b594-87c92c6c9669 (mRNA) 108 residues [Merlin:1067-2011 - strand] [cds] name=Unknown -CACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGAAAGTCTGGTGTATCGAAATA -TCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTATTAGGTGA ->58fc8255-95ed-4417-a373-238f826810ac (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cds] name=Unknown +>eedd9b31-f1dd-487c-a3ab-6435b2214685 (mRNA) 288 residues [Merlin:752-1039 + strand] [cds] name=mrna-name-00001 +ATGAAATCAATTTTTCGTATCAACGGTGTAGAAATTGTAGTTGAAGATGTAGTTCCTATG +TCTTATGAATTCAATGAAGTTGTTTTCAAAGAGCTTAAGAAAATTTTAGGCGATAAGAAG +CTTCAAAGTACTCCAATTGGACGTTTTGGAATGAAAGAAAACGTTGATACTTATATTGAA +AGTGTAGTGACAGGGCAGTTAGAAGGTGAATTTTCTGTAGCAGTTCAAACTGTAGAAAAT +GATGAAGTTATTTTAACTTTACCAGCTTTCGTAATTTTCCGCAAATAA +>8cd6db6d-b818-4b65-b6f2-de79844a5641 (mRNA) 945 residues [Merlin:1067-2011 - strand] [cds] name=Merlin_3_mRNA-00001 +ATGCTAACTTTAGATGAATTTAAAAACCAAGCGGGTAATATAGACTTTCAGCGTACTAAT +ATGTTTAGTTGTGTATTTGCAACTACTCCGTCAGCAAAGTCTCAACAATTACTCGATCAA +TTTGGCGGTATGCTCTTTAATAACCTTCCGTTGAATAATGACTGGCTTGGATTAACACAA +GGTGAGTTCACATCAGGACTCACCTCAATTATCACTGCCGGTACTCAACAGCTGGTAAGA +AAGTCTGGTGTATCGAAATATCTTATTGGAGCAATGAGCAATCGTGTTGTTCAGTCTTTA +TTAGGTGAATTTGAAGTCGGAACTTATTTGTTAGACTTCTTTAACATGGCTTATCCGCAA +TCTGGATTGATGATTTATTCGGTCAAAATTCCAGAGAACAGATTGTCTCATGAAATGGAT +TTCAACCATAACTCACCGAATATTAGAATAACTGGACGTGAACTCGATCCGTTAACTATA +TCATTCAGAATGGATCCCGAAGCAAGTAACTATCGTGCAATGCAAGATTGGGTGAACTCC +GTTCAAGACCCGGTTACTGGATTGCGAGCATTACCAACTGACGTCGAAGCTGACATTCAG +GTTAACCTTCATGCTCGAAATGGATTACCTCATACTGTGATAATGTTCACAGGTTGTGTT +CCTGTTGCGTGTGGAGCTCCTGAGCTTACATATGAAGGAGATAACCAAATTGCGGTTTTC +GATGTTACATTTGCTTACAGAGTAATGCAAACGGGTGCTGTTGGACGTCAAGCTGCTCTT +GATTGGATTGAAGATAGAGCTGTTAATTCTATAACTGGAATTAATAGTGAAATGTCTCTT +AATGGAAGTTTAAGTAGATTATCTAGACTTGGAGGAGCTGCTGGAGGGTTGTCTCACGTC +ATTAATTCGACCCGAAACTCTACTTCGAAAATACTTGGATTGTAA +>125a44c4-44da-4854-9d3b-ae2aa0221120 (mRNA) 1056 residues [Merlin:2011-3066 - strand] [cds] name=Merlin_4_mRNA-00001 ATGAGCATTAAAGTCAGAGAATTAGATGATAAGACTGATGCTTTAATTAGCGGAGTTAAA ACCTCCGCTGGTCAAAGTTCACAATCAGCAAAAATAAAATCCACTATAACTGCGCAATAT CCGTCTGAACGTTCAGCTGGTAATGACACATCTGGTTCTTTACGAGTTCATGATCTTTAT @@ -35,7 +53,7 @@ TGTCAAATTCAGAGTATCCGTTTTGATAAAACTCCAAATGGAAACTTTAACGGTTTAGCT ATAGCTCCAAACCTGCCAAGTACATTCACATTAGAAATTACTATGCGTGAAATCTTGACA TTGAACCGAGCTTCAGTATATGCGGAAGGATTCTGA ->c009dd7a-3284-4e7f-9ee1-3b56e2598e07 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds] name=multiexongene +>87414073-7db0-4a60-903f-5e3593e55884 (mRNA) 1662 residues [Merlin:3066-4796 - strand] [cds] name=Merlin_5_mRNA-00001 ATGAAAAGCGAAAACATGTCCACAATGAGACGTCGTAAAGTTATCGCTGATTCAAAGGGT GAAAGAGATGCAGCCTCGACTGCATCTGATCAAGTAGACTCTTTAGAATTAATCGGCCTT AAACTTGATGATGTACAAAGCGCTAATGAACTAGTTGCTGAAGTAATTGAAGAAAAGGGC @@ -64,5 +82,17 @@ AAAGCTGAAAATGCTAAAGAGCAATCTAAAAAATCAACCGGTGATATGAATGTTGCTAAC ACTCAGGTTAATAACGTAAATAATAGTAAGACTATTCACCAGGTTCAAACAGTCACGGCT ACTCCAGCTCCTGGAGTATTCGGGGCAACAGGAGTTAATTAA ->2706ea76-172a-48c1-b940-eb603996f082 (mRNA) 6 residues [Merlin:5011-6066 - strand] [cds] name=cds-not-under-exon -CTTTAA +>1dcb3fc3-3307-4476-9c8a-04d252f9371f (mRNA) 777 residues [Merlin:5011-6066 - strand] [cds] name=Merlin_42_mRNA-00001 +TTTAATGACGCTGGTGAATCAATAAAAGAGATGATCGGTGCAATTTATGAATCAAAACCT +CTTATAGCACCTGCGATGAACACAATCAACACATATGTTCCTCGAGTTCCATGGACGAGT +AACATAACTGAATACAAGAAATATGTTCGAGATGTTGCATTAGCAGTAGATAATGACCAA +TTCGTTTTTGTATGGGAAGATATCTATGGCTTGAACATGATGGATTATGACGCAATGATT +AACCAAGAATCAATCAAGGTTATTGTCGGTGAACCACGCACAATAGGTCAATTTGTCGGT +GAGCTGGAATATAATCTCGCTTATGACTTCCAGTGGTTAACGAAGGCTAATGCCCATACA +CGCGATCCTATTTTTAACGCTACAATCTATTCACACTCATTCTTGGATAATAACCTTCCT +AGAATAGTAACAGGTGATGGACAGAATAGCATCTTCGTTTCTCGCTCGGGTGCATATTCT +GAAATGACTTATCGAAATGGATATGAAGAAGCTATCAGGCTTCAGACTATGGCACAATAC +GACGGTTATGCAACTTGTAAAATGGTTGGAGACTTTGAAATGACTCCTGGAGATAAGATT +AATTTCTTTGATCCAAAGAAACAATTCAAAGCTGATTTTTACATTGATGAAGTAATTCAT +GAAGTAAGTAATAACCAAAGCATAACTACACTTTATATGTTTACTAACTCTCGTAAGTTG +GAAACAGTAGAACCAATAAAGGTTAAAAATGAACTTAAATCTGATACTACCACTTAA
--- a/test-data/export/pep.fa Fri Apr 24 09:03:01 2020 -0400 +++ b/test-data/export/pep.fa Mon Jun 29 09:13:41 2020 -0400 @@ -1,20 +1,26 @@ ->ff2fe902-7bab-431c-be82-30ed072915d1 (mRNA) 229 residues [Merlin:2-691 + strand] [peptide] name=Unknown +>df4157db-1481-4120-858c-e6a4c0f78547 (mRNA) 229 residues [Merlin:2-691 + strand] [peptide] name=Merlin_1_mRNA-00001 RLDKGTLLYRGQKLDLPTFEHNAENKLFYFRNYVSTSLKPLIFGEFGRMFMALDDDTTIY TAETPDDYNRFANPEDIIDIGATQKDSFDDNNNDGTSINIGKQVNLGFVISGAENVRVIV PGSLTEYPEEAEVILPRGTLLKINKITTQVDKRSNKFMVEGSIVPPSEQIDESVEIYDGD LFMETGEVVKLSGFMQFVNESAYDEEQNQMAAEILSGFLDIDDMPRKFR ->f2e1909a-1d40-4a49-a67b-5fe2afdc4957 (mRNA) 2 residues [Merlin:752-1039 + strand] [peptide] name=Unknown -KF ->12fe0db6-c8e1-4bc9-b594-87c92c6c9669 (mRNA) 35 residues [Merlin:1067-2011 - strand] [peptide] name=Unknown -HLNYHCRYSTAGKKVWCIEISYWSNEQSCCSVFIR ->58fc8255-95ed-4417-a373-238f826810ac (mRNA) 351 residues [Merlin:2011-3066 - strand] [peptide] name=Unknown +>eedd9b31-f1dd-487c-a3ab-6435b2214685 (mRNA) 95 residues [Merlin:752-1039 + strand] [peptide] name=mrna-name-00001 +MKSIFRINGVEIVVEDVVPMSYEFNEVVFKELKKILGDKKLQSTPIGRFGMKENVDTYIE +SVVTGQLEGEFSVAVQTVENDEVILTLPAFVIFRK +>8cd6db6d-b818-4b65-b6f2-de79844a5641 (mRNA) 314 residues [Merlin:1067-2011 - strand] [peptide] name=Merlin_3_mRNA-00001 +MLTLDEFKNQAGNIDFQRTNMFSCVFATTPSAKSQQLLDQFGGMLFNNLPLNNDWLGLTQ +GEFTSGLTSIITAGTQQLVRKSGVSKYLIGAMSNRVVQSLLGEFEVGTYLLDFFNMAYPQ +SGLMIYSVKIPENRLSHEMDFNHNSPNIRITGRELDPLTISFRMDPEASNYRAMQDWVNS +VQDPVTGLRALPTDVEADIQVNLHARNGLPHTVIMFTGCVPVACGAPELTYEGDNQIAVF +DVTFAYRVMQTGAVGRQAALDWIEDRAVNSITGINSEMSLNGSLSRLSRLGGAAGGLSHV +INSTRNSTSKILGL +>125a44c4-44da-4854-9d3b-ae2aa0221120 (mRNA) 351 residues [Merlin:2011-3066 - strand] [peptide] name=Merlin_4_mRNA-00001 MSIKVRELDDKTDALISGVKTSAGQSSQSAKIKSTITAQYPSERSAGNDTSGSLRVHDLY KNGLLFTAYDMNSRTTGDMRSMRLGEMKRTANSVVKSITGTNTNKVDKIPVVNILLPRSK SDVESVSHKFNDVGDSLISRGGGTATGVLSNVASTAVFGGLESLTQGLMADHNEQIYNTA RSMYGGADNRTKVFTWDLTPRSVQDLIAIIEIYEYFNYYSYGETGTSTYAKEVKSQLDEW YKSTFLDTLTPDEANKNDTVFEKITSFLSNVIVVSNPTVWFVRNFGTTSKFDGRAEVFGP CQIQSIRFDKTPNGNFNGLAIAPNLPSTFTLEITMREILTLNRASVYAEGF ->c009dd7a-3284-4e7f-9ee1-3b56e2598e07 (mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide] name=multiexongene +>87414073-7db0-4a60-903f-5e3593e55884 (mRNA) 553 residues [Merlin:3066-4796 - strand] [peptide] name=Merlin_5_mRNA-00001 MKSENMSTMRRRKVIADSKGERDAASTASDQVDSLELIGLKLDDVQSANELVAEVIEEKG NNLIDSVDNVAEGTELAAEASERTTESIKTLTGVASTISDKLSKLASMLESKVQAVEQKV QESGASASTGLSVIEDKLPDPDEPFFPPVPQEPENNKKDQKKDDKKPTDMLGDLLKTTKG @@ -25,5 +31,9 @@ YIGDVDPENPTNMQSLEKAYNSAKKSISDSAISDQPATKKELDKRFQRVESKYQKLKEDN TPKPAAPATSEDNQRVQNIQKAENAKEQSKKSTGDMNVANTQVNNVNNSKTIHQVQTVTA TPAPGVFGATGVN ->2706ea76-172a-48c1-b940-eb603996f082 (mRNA) 1 residues [Merlin:5011-6066 - strand] [peptide] name=cds-not-under-exon -L +>1dcb3fc3-3307-4476-9c8a-04d252f9371f (mRNA) 258 residues [Merlin:5011-6066 - strand] [peptide] name=Merlin_42_mRNA-00001 +FNDAGESIKEMIGAIYESKPLIAPAMNTINTYVPRVPWTSNITEYKKYVRDVALAVDNDQ +FVFVWEDIYGLNMMDYDAMINQESIKVIVGEPRTIGQFVGELEYNLAYDFQWLTKANAHT +RDPIFNATIYSHSFLDNNLPRIVTGDGQNSIFVSRSGAYSEMTYRNGYEEAIRLQTMAQY +DGYATCKMVGDFEMTPGDKINFFDPKKQFKADFYIDEVIHEVSNNQSITTLYMFTNSRKL +ETVEPIKVKNELKSDTTT
--- a/test-data/load_gff3/output.tsv Fri Apr 24 09:03:01 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -# Feature ID Apollo ID Success Messages -Merlin_1 a036ab4f-512d-45e3-b19e-1fe83984a185 success -Merlin_2 4a3f5c6b-03fc-43d1-8d6d-1a7075931cc6 success -Merlin_3 de819682-eb71-4b98-a532-032d59354b95 success -Merlin_4 423ca5cc-d570-4ae8-8527-6a52e0a6862b success -Merlin_5 f1b5327d-79ca-40f0-b4b3-90ea03d79a56 success -Merlin_42 3629a7fe-03bb-420d-85d3-f23e55430abd success
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/load_gff3/output.txt Mon Jun 29 09:13:41 2020 -0400 @@ -0,0 +1,646 @@ +{ + "Merlin_1_mRNA": { + "owner": "admin@local.host", + "parent_name": "Merlin_1_mRNA", + "uniquename": "d33cb6e8-0809-442f-84ff-015aff163846", + "type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "gene" + }, + "date_creation": 1593424039041, + "sequence": "Merlin", + "children": [ + { + "owner": "None", + "parent_name": "Merlin_1_mRNA-00001", + "uniquename": "d33cb6e8-0809-442f-84ff-015aff163846-CDS", + "type": { + "cv": { + "name": "sequence" + }, + "name": "CDS" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039071, + "sequence": "Merlin", + "parent_id": "d33cb6e8-0809-442f-84ff-015aff163846", + "name": "d33cb6e8-0809-442f-84ff-015aff163846-CDS", + "location": { + "strand": 1, + "is_fmin_partial": true, + "id": 459, + "fmin": 1, + "fmax": 691 + }, + "id": 458, + "properties": [], + "date_last_modified": 1593424039095 + }, + { + "owner": "None", + "parent_name": "Merlin_1_mRNA-00001", + "uniquename": "0356ce47-8746-4557-9f45-2b08625ebc63", + "type": { + "cv": { + "name": "sequence" + }, + "name": "exon" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039029, + "sequence": "Merlin", + "parent_id": "d33cb6e8-0809-442f-84ff-015aff163846", + "name": "0356ce47-8746-4557-9f45-2b08625ebc63", + "location": { + "strand": 1, + "id": 455, + "fmin": 1, + "fmax": 691 + }, + "id": 454, + "properties": [], + "date_last_modified": 1593424039078 + } + ], + "parent_id": "436dee83-9e76-44c3-94a8-37ac15c57e8e", + "name": "Merlin_1_mRNA-00001", + "location": { + "strand": 1, + "id": 453, + "fmin": 1, + "fmax": 691 + }, + "id": 452, + "properties": [], + "date_last_modified": 1593424039249 + }, + "Merlin_2_mRNA": { + "owner": "admin@local.host", + "parent_name": "mrna-name", + "uniquename": "d5d0e6eb-458d-4402-b2ee-c19e4c1e0666", + "type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "gene" + }, + "date_creation": 1593424039315, + "sequence": "Merlin", + "children": [ + { + "owner": "None", + "parent_name": "mrna-name-00001", + "uniquename": "d5d0e6eb-458d-4402-b2ee-c19e4c1e0666-CDS", + "type": { + "cv": { + "name": "sequence" + }, + "name": "CDS" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039321, + "sequence": "Merlin", + "parent_id": "d5d0e6eb-458d-4402-b2ee-c19e4c1e0666", + "name": "d5d0e6eb-458d-4402-b2ee-c19e4c1e0666-CDS", + "location": { + "strand": 1, + "id": 471, + "fmin": 751, + "fmax": 1039 + }, + "id": 470, + "properties": [], + "date_last_modified": 1593424039334 + }, + { + "owner": "None", + "parent_name": "mrna-name-00001", + "uniquename": "46e2b6ce-5cf2-4472-b858-9535a374f535", + "type": { + "cv": { + "name": "sequence" + }, + "name": "exon" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039312, + "sequence": "Merlin", + "parent_id": "d5d0e6eb-458d-4402-b2ee-c19e4c1e0666", + "name": "46e2b6ce-5cf2-4472-b858-9535a374f535", + "location": { + "strand": 1, + "id": 467, + "fmin": 751, + "fmax": 1039 + }, + "id": 466, + "properties": [], + "date_last_modified": 1593424039328 + } + ], + "parent_id": "9d2661e7-c095-426e-a9ce-0e0f9d68c1ca", + "name": "mrna-name-00001", + "location": { + "strand": 1, + "id": 465, + "fmin": 751, + "fmax": 1039 + }, + "id": 464, + "properties": [], + "date_last_modified": 1593424039356 + }, + "Merlin_3_mRNA": { + "owner": "admin@local.host", + "parent_name": "Merlin_3_mRNA", + "uniquename": "c1e84840-7b7e-4c45-b044-e6ffaa89cb5d", + "type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "gene" + }, + "date_creation": 1593424039392, + "sequence": "Merlin", + "children": [ + { + "owner": "None", + "parent_name": "Merlin_3_mRNA-00001", + "uniquename": "3fc60774-85c1-4cbd-aca3-1a1a74a2cbf3", + "type": { + "cv": { + "name": "sequence" + }, + "name": "exon" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039390, + "sequence": "Merlin", + "parent_id": "c1e84840-7b7e-4c45-b044-e6ffaa89cb5d", + "name": "3fc60774-85c1-4cbd-aca3-1a1a74a2cbf3", + "location": { + "strand": -1, + "id": 479, + "fmin": 1066, + "fmax": 2011 + }, + "id": 478, + "properties": [], + "date_last_modified": 1593424039410 + }, + { + "owner": "None", + "parent_name": "Merlin_3_mRNA-00001", + "uniquename": "c1e84840-7b7e-4c45-b044-e6ffaa89cb5d-CDS", + "type": { + "cv": { + "name": "sequence" + }, + "name": "CDS" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039403, + "sequence": "Merlin", + "parent_id": "c1e84840-7b7e-4c45-b044-e6ffaa89cb5d", + "name": "c1e84840-7b7e-4c45-b044-e6ffaa89cb5d-CDS", + "location": { + "strand": -1, + "id": 483, + "fmin": 1066, + "fmax": 2011 + }, + "id": 482, + "properties": [], + "date_last_modified": 1593424039416 + } + ], + "parent_id": "6a3cfa63-9957-44c2-855e-8914d14c6773", + "name": "Merlin_3_mRNA-00001", + "location": { + "strand": -1, + "id": 477, + "fmin": 1066, + "fmax": 2011 + }, + "id": 476, + "properties": [], + "date_last_modified": 1593424039439 + }, + "Merlin_4_mRNA": { + "owner": "admin@local.host", + "parent_name": "Merlin_4_mRNA", + "uniquename": "4b3f4451-6feb-4a89-85ee-002c03679c4b", + "type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "gene" + }, + "date_creation": 1593424039555, + "sequence": "Merlin", + "children": [ + { + "owner": "None", + "parent_name": "Merlin_4_mRNA-00001", + "uniquename": "4b3f4451-6feb-4a89-85ee-002c03679c4b-CDS", + "type": { + "cv": { + "name": "sequence" + }, + "name": "CDS" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039563, + "sequence": "Merlin", + "parent_id": "4b3f4451-6feb-4a89-85ee-002c03679c4b", + "name": "4b3f4451-6feb-4a89-85ee-002c03679c4b-CDS", + "location": { + "strand": -1, + "id": 503, + "fmin": 2010, + "fmax": 3066 + }, + "id": 502, + "properties": [], + "date_last_modified": 1593424039575 + }, + { + "owner": "None", + "parent_name": "Merlin_4_mRNA-00001", + "uniquename": "4322d1af-5aa0-4777-9bd2-20d87b807d1e", + "type": { + "cv": { + "name": "sequence" + }, + "name": "exon" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039553, + "sequence": "Merlin", + "parent_id": "4b3f4451-6feb-4a89-85ee-002c03679c4b", + "name": "4322d1af-5aa0-4777-9bd2-20d87b807d1e", + "location": { + "strand": -1, + "id": 499, + "fmin": 2010, + "fmax": 3066 + }, + "id": 498, + "properties": [], + "date_last_modified": 1593424039569 + } + ], + "parent_id": "71eafcd2-8ef6-4a71-88c2-bcac2f342e28", + "name": "Merlin_4_mRNA-00001", + "location": { + "strand": -1, + "id": 497, + "fmin": 2010, + "fmax": 3066 + }, + "id": 496, + "properties": [], + "date_last_modified": 1593424039596 + }, + "Merlin_5_mRNA": { + "owner": "admin@local.host", + "parent_name": "Merlin_5_mRNA", + "uniquename": "19b80336-a138-4276-96e1-116a7787afe8", + "type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "gene" + }, + "date_creation": 1593424039700, + "sequence": "Merlin", + "children": [ + { + "owner": "None", + "parent_name": "Merlin_5_mRNA-00001", + "uniquename": "19b80336-a138-4276-96e1-116a7787afe8-non_canonical_five_prime_splice_site-4363", + "type": { + "cv": { + "name": "sequence" + }, + "name": "non_canonical_five_prime_splice_site" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039756, + "sequence": "Merlin", + "parent_id": "19b80336-a138-4276-96e1-116a7787afe8", + "name": "19b80336-a138-4276-96e1-116a7787afe8-non_canonical_five_prime_splice_site-4363", + "location": { + "strand": -1, + "id": 532, + "fmin": 4363, + "fmax": 4363 + }, + "id": 531, + "properties": [], + "date_last_modified": 1593424039803 + }, + { + "owner": "None", + "parent_name": "Merlin_5_mRNA-00001", + "uniquename": "19b80336-a138-4276-96e1-116a7787afe8-non_canonical_three_prime_splice_site-4296", + "type": { + "cv": { + "name": "sequence" + }, + "name": "non_canonical_three_prime_splice_site" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039767, + "sequence": "Merlin", + "parent_id": "19b80336-a138-4276-96e1-116a7787afe8", + "name": "19b80336-a138-4276-96e1-116a7787afe8-non_canonical_three_prime_splice_site-4296", + "location": { + "strand": -1, + "id": 535, + "fmin": 4296, + "fmax": 4296 + }, + "id": 534, + "properties": [], + "date_last_modified": 1593424039804 + }, + { + "owner": "None", + "parent_name": "Merlin_5_mRNA-00001", + "uniquename": "f192c0c8-c043-474a-9cba-fe593340a28f", + "type": { + "cv": { + "name": "sequence" + }, + "name": "exon" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039691, + "sequence": "Merlin", + "parent_id": "19b80336-a138-4276-96e1-116a7787afe8", + "name": "f192c0c8-c043-474a-9cba-fe593340a28f", + "location": { + "strand": -1, + "id": 522, + "fmin": 3065, + "fmax": 4296 + }, + "id": 521, + "properties": [], + "date_last_modified": 1593424039696 + }, + { + "owner": "None", + "parent_name": "Merlin_5_mRNA-00001", + "uniquename": "31a23617-42b1-4bf1-a65d-45eccdd17e8e", + "type": { + "cv": { + "name": "sequence" + }, + "name": "exon" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039699, + "sequence": "Merlin", + "parent_id": "19b80336-a138-4276-96e1-116a7787afe8", + "name": "31a23617-42b1-4bf1-a65d-45eccdd17e8e", + "location": { + "strand": -1, + "id": 525, + "fmin": 4365, + "fmax": 4796 + }, + "id": 524, + "properties": [], + "date_last_modified": 1593424039716 + }, + { + "owner": "None", + "parent_name": "Merlin_5_mRNA-00001", + "uniquename": "19b80336-a138-4276-96e1-116a7787afe8-CDS", + "type": { + "cv": { + "name": "sequence" + }, + "name": "CDS" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039710, + "sequence": "Merlin", + "parent_id": "19b80336-a138-4276-96e1-116a7787afe8", + "name": "19b80336-a138-4276-96e1-116a7787afe8-CDS", + "location": { + "strand": -1, + "id": 529, + "fmin": 3065, + "fmax": 4796 + }, + "id": 528, + "properties": [], + "date_last_modified": 1593424039722 + } + ], + "parent_id": "f380f7b1-2b7d-45cd-8d0b-2519b1b0afee", + "name": "Merlin_5_mRNA-00001", + "location": { + "strand": -1, + "id": 520, + "fmin": 3065, + "fmax": 4796 + }, + "id": 519, + "properties": [], + "date_last_modified": 1593424039810 + }, + "Merlin_42_mRNA": { + "owner": "admin@local.host", + "parent_name": "Merlin_42_mRNA", + "uniquename": "0e34cfa5-6f64-4111-9c2f-37655f6f1b92", + "type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "gene" + }, + "date_creation": 1593424039850, + "sequence": "Merlin", + "children": [ + { + "owner": "None", + "parent_name": "Merlin_42_mRNA-00001", + "uniquename": "6596e6e2-a894-4c0b-98c1-298c65f0c021", + "type": { + "cv": { + "name": "sequence" + }, + "name": "exon" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039841, + "sequence": "Merlin", + "parent_id": "0e34cfa5-6f64-4111-9c2f-37655f6f1b92", + "name": "6596e6e2-a894-4c0b-98c1-298c65f0c021", + "location": { + "strand": -1, + "id": 543, + "fmin": 5010, + "fmax": 6066 + }, + "id": 542, + "properties": [], + "date_last_modified": 1593424039847 + }, + { + "owner": "None", + "parent_name": "Merlin_42_mRNA-00001", + "uniquename": "a42c4166-5211-49b9-9110-e5fd41c9d2cb", + "type": { + "cv": { + "name": "sequence" + }, + "name": "CDS" + }, + "parent_type": { + "cv": { + "name": "sequence" + }, + "name": "mRNA" + }, + "date_creation": 1593424039849, + "sequence": "Merlin", + "parent_id": "0e34cfa5-6f64-4111-9c2f-37655f6f1b92", + "name": "a42c4166-5211-49b9-9110-e5fd41c9d2cb", + "location": { + "strand": -1, + "id": 546, + "fmin": 5288, + "fmax": 6065, + "is_fmax_partial": true + }, + "id": 545, + "properties": [], + "date_last_modified": 1593424039867 + } + ], + "parent_id": "f7843db6-26da-44f9-ae05-6bb92ca573f6", + "name": "Merlin_42_mRNA-00001", + "location": { + "strand": -1, + "id": 541, + "fmin": 5010, + "fmax": 6066 + }, + "id": 540, + "properties": [], + "date_last_modified": 1593424039879 + } +}