Previous changeset 11:a712b378e090 (2015-02-21) Next changeset 13:1efd1975a68d (2015-02-21) |
Commit message:
cutadapters added |
modified:
fasta2rdf/fastatordf.xml |
added:
protein2rdf/protein_to_ttl.py protein2rdf/protein_to_ttl.xml protein2rdf/test-data/NC_017117.faa rnaseq/cutadapt/cutadapt_adapters.txt |
b |
diff -r a712b378e090 -r 0773b11fb822 fasta2rdf/fastatordf.xml --- a/fasta2rdf/fastatordf.xml Sat Feb 21 16:33:42 2015 +0100 +++ b/fasta2rdf/fastatordf.xml Sat Feb 21 16:56:49 2015 +0100 |
b |
@@ -1,4 +1,8 @@ <tool id="SAPP_genome_to_ttl" name="FASTA to RDF" version="0.1"> + <requirements> + <requirement type='package' version="3.4">python</requirement> + <requirement type='package' version="1.0">rdflib</requirement> + </requirements> <description></description> <command interpreter="python3">fastatordf.py '-input' '$input' '-output' '$output' '-organism' '$organism' '-ncbi_taxid' '$ncbi_taxid' '-idtag' '$identification_tag' -sourcedb SAPP #for $index, $id in enumerate( $ids ) |
b |
diff -r a712b378e090 -r 0773b11fb822 protein2rdf/protein_to_ttl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein2rdf/protein_to_ttl.py Sat Feb 21 16:56:49 2015 +0100 |
[ |
@@ -0,0 +1,130 @@ +def delete_galaxy(): + import sys + for index, path in enumerate(sys.path): + if "galaxy-dist/" in path: + sys.path[index] = '' + +#Some modules that are required by RDFLIB are also in galaxy, this messes up the RDF import function. +delete_galaxy() + +# from io import StringIO +from rdflib import Graph, URIRef, Literal,Namespace, RDF,RDFS,OWL, plugin +# import rdflib +from rdflib.store import Store +import sys +import hashlib + +store = plugin.get('IOMemory', Store)() + +global URI +URI = "http://csb.wur.nl/genome/" +global seeAlso +seeAlso = "rdfs:seeAlso" +global coreURI +coreURI = Namespace(URI) + + +def createClass(uri): + genomeGraph.add((uri,RDF.type,OWL.Class)) + genomeGraph.add((uri,RDFS.subClassOf,OWL.Thing)) + return uri + +def fasta_parser(input_file): + createClass(coreURI["Protein"]) + + genome = sys.argv[sys.argv.index('-idtag')+1].replace(" ","_") + if genome == '': + genome = sys.argv[sys.argv.index('-id_alternative')+1].replace(" ","_").replace(".","_") + + genomeURI = coreURI[genome] + for index, element in enumerate(sys.argv): + if '-organism' == element: + genomeGraph.add((genomeURI, coreURI["organism"] , Literal(sys.argv[index+1]))) + if '-ncbi_taxid' == element: + genomeGraph.add((genomeURI, coreURI["taxonomy"] , Literal(sys.argv[index+1]))) + if '-idtag' == element: + genomeGraph.add((genomeURI, coreURI["id_tag"] , Literal(sys.argv[index+1]))) + if '-diagnosis' == element: + genomeGraph.add((genomeURI, coreURI["diagnosis"] , Literal(sys.argv[index+1]))) + if '-country' == element: + genomeGraph.add((genomeURI, coreURI["country"] , Literal(sys.argv[index+1]))) + if '-location' == element: + genomeGraph.add((genomeURI, coreURI["location"] , Literal(sys.argv[index+1]))) + if '-date' == element: + genomeGraph.add((genomeURI, coreURI["date"] , Literal(sys.argv[index+1]))) + if '-ids' == element: + genomeGraph.add((genomeURI, coreURI["id_tag"] , Literal(sys.argv[index+1]))) + + + + data = (open(input_file).readlines()) + fastadict = {} + sequence = "" + key = "" + for index, line in enumerate(data): + if ">" == line[0]: + if sequence: + fastadict[key] = sequence + key = line + sequence = "" + fastadict[key] = "" + else: + sequence += line.strip() + fastadict[key] = sequence + + #Create a class, to be the same as all the other genome conversions... + #TODO: Proteins are part of cds, cds are part of dnaobject + #If CDS is not there... how then? + classURI = coreURI[genome + "/" + "protein_fasta"] + proteinClass = createClass(coreURI["Protein"]) + genomeClass = createClass(coreURI["Genome"]) + typeClass = createClass(coreURI["DnaObject"]) + cdsClass = createClass(coreURI["Cds"]) + #A theoretical begin, end is created to have a workable GBK generation + begin = 0 + end = 0 + genomeGraph.add((genomeURI, RDF.type, genomeClass)) + genomeGraph.add((genomeURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) + genomeGraph.add((genomeURI, coreURI["dnaobject"] , classURI)) + genomeGraph.add((classURI, RDF.type, typeClass)) + + for protein in fastadict: + sequence = fastadict[protein] + sequence = sequence.encode('utf-8') + end = begin + len(sequence) + md5_protein = hashlib.md5(sequence).hexdigest() + proteinURI = coreURI["protein/"+md5_protein] + + cdsURI = coreURI[genome + "/protein_fasta/" + str(begin)+"_"+str(end)] + genomeGraph.add((classURI, coreURI["feature"] , cdsURI)) + genomeGraph.add((cdsURI, coreURI["begin"] , Literal(begin))) + genomeGraph.add((cdsURI, coreURI["end"] , Literal(end))) + genomeGraph.add((cdsURI, coreURI["sourcedb"] , Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) + genomeGraph.add((cdsURI, coreURI["protein"] , proteinURI)) + genomeGraph.add((cdsURI, RDF.type, cdsClass)) + + + + genomeGraph.add((proteinURI,coreURI["md5"],Literal(md5_protein))) + genomeGraph.add((proteinURI,coreURI["sequence"],Literal(sequence))) + genomeGraph.add((proteinURI,RDF.type,proteinClass)) + genomeGraph.add((proteinURI, coreURI["sourcedb"], Literal(sys.argv[sys.argv.index("-sourcedb")+1]))) + genomeGraph.add((proteinURI, RDF.type, proteinClass)) + begin = end + +def save(): + data = genomeGraph.serialize(format='turtle') + open(sys.argv[sys.argv.index("-output")+1],"wb").write(data) + +def main(): + store = plugin.get('IOMemory', Store)() + global genomeGraph + genomeGraph = Graph(store,URIRef(URI)) + genomeGraph.bind("ssb",coreURI) + input_file = sys.argv[sys.argv.index("-input")+1] + fasta_parser(input_file) + save() + +if __name__ == '__main__': + main() + |
b |
diff -r a712b378e090 -r 0773b11fb822 protein2rdf/protein_to_ttl.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein2rdf/protein_to_ttl.xml Sat Feb 21 16:56:49 2015 +0100 |
b |
@@ -0,0 +1,42 @@ +<tool id="SAPP_protein_rdf" name="Protein FASTA to RDF" version="0.1"> + <requirements> + <requirement type='package' version="3.4">python</requirement> + <requirement type='package' version="1.0">rdflib</requirement> + </requirements> + <description></description> + <command interpreter="python3.4">protein_to_ttl.py '-input' '$input' '-output' '$output' '-organism' '$organism' '-ncbi_taxid' '$ncbi_taxid' '-idtag' '$identification_tag' '-diagnosis' '$diagnosis' '-country' '$country' '-location' '$location' '-date' '$date' -sourcedb SAPP + #for $index, $id in enumerate( $ids ) + '-ids' '$id.id_tag' + #end for + '-id_alternative' '$input.name' + </command> + <inputs> + <param size="60" name="input" type="data" format="fasta,fa" label="File for annotation, file types used fasta,fa"/> + <param size="60" name="organism" type="text" format="text" label="organism name"/> + <param size="60" name="diagnosis" type="text" format="text" label="Diagnosis of host if applicable"/> + <param size="60" name="ncbi_taxid" type="text" format="text" label="NCBI taxonomy ID"/> + <param size="60" name="country" type="text" format="text" label="Country of sample"/> + <param size="60" name="location" type="text" format="text" label="Location of sample e.g., river, city, hospital"/> + <param size="60" name="date" type="text" format="text" label="Sample date"/> + <param size="60" name="identification_tag" type="text" format="text" label="An identification tag used for RDF storage !Needs to be very unique!"/> + <repeat name="ids" title="Identification tags"> + <param size="60" name="id_tag" type="text" format="text" label="An identification tag used by other consortiums"/> + </repeat> + </inputs> + + <outputs> + <data format="rdf" name="output" label="proteinTTL: ${input.name}" /> + </outputs> + <tests> + <test> + <param name="input" value="test-data/NC_017117.faa"/> + <output name="$output" file="NC_017117.rdf"/> + <output name="$ncbi_taxid" value="634455"/> + <output name="$idtag" value="Acetobacter pasteurianus IFO 3283-22"/> + <output name="$organism" value="Acetobacter pasteurianus IFO 3283-22"/> + </test> + </tests> + <help> + RDF creation from a multi protein fasta file + </help> +</tool> |
b |
diff -r a712b378e090 -r 0773b11fb822 protein2rdf/test-data/NC_017117.faa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein2rdf/test-data/NC_017117.faa Sat Feb 21 16:56:49 2015 +0100 |
[ |
b'@@ -0,0 +1,993 @@\n+>gi|384055706|ref|YP_005485330.1| transposase [Acetobacter pasteurianus IFO 3283-22]\n+MKSDRFTDAQIMGVIRQAEGGVPVPDLCREHGISNATFYRWRAKYGGMDASMISQMKALEEENRRLKRMY\n+ADLSMQTDILKEALGKK\n+>gi|384055707|ref|YP_005485331.1| DNA helicase II UvrD/Rep [Acetobacter pasteurianus IFO 3283-22]\n+MAGHHVEAMIARAHAQKRFMDDAGWRYVVELYGRYQSLLREQNAADFGDLLMWPTLAMLHNDAYRYRWSR\n+RFTAVMADEFQDVNRAQFLWLKMISEVSAEFFAVGDDSQSIL\n+>gi|384055708|ref|YP_005485332.1| transposase [Acetobacter pasteurianus IFO 3283-22]\n+MVVGRNDCAKGRQMKDTVIGVDLAKNIFQVHGASRAGEVMFRKKLRRQQFMQFMATQPPALVVLEACGSA\n+HYWARELAGAGHEVRLIAPQYVKPFVKRQKNDAADAEAIVIAARQPEMRFVEPRTEAQQARGVLFRARQR\n+LVHQRTELVNALRAVLYEFGLVVPQGIAHIRHIEAMLDEAVLPEAVKQECLDLLRQISEQSVRIDVRTKK\n+IRMLAQESENTCRLQSMPGVGPLTALAIEAFAPDLQSFRRGRDFAAWLGLVPRQFSSGGKERLGKISKAG\n+QADIRRLLIMGAMTQVNWASRKAPAPGSWLARMLARKPRMLVAIALANRMARAIWAMATKQEDYRDPALS\n+VAA\n+>gi|384055709|ref|YP_005485333.1| transposase [Acetobacter pasteurianus IFO 3283-22]\n+MEQIIRIGMDTSKSVFQLHGVNAKEQPVLRRKLSRREMVKFFEKLPPIEIAIEACGASHYWGRVLSCLGH\n+TVKLIAPQLVKPYVKRGKNDAADAEALCEAMSRPTMRFVPLKSEEEQAALMLIGMRARLIRNRTQLANTI\n+RGYAAEFGITAPKGMCRIEALLDRIAADESLPTLTRELFALHAKEYAELQGEIEQLEGKVMAWHRANECS\n+QRLAKIPGVGPIGAALLMMKTPDPHLFKSGRAFAAWIGLTPRDHSTGGKTRLGRITRAGDEVLRSTLVVG\n+ATAVVSHARRTNGKNASSWLRELLERKKPKLAAVALANKIARIAWKLMVSGEHYKRLLQQPGAAAV\n+>gi|384055710|ref|YP_005485334.1| DNA resolvase [Acetobacter pasteurianus IFO 3283-22]\n+MVPPKPGKTPVGGRLIGYARVSTDDQGTDAQLNELRDAGCTMIFEKHASGADRNRPVLIRLLRDMNAGDT\n+LVVVRLDRLARSVSHLLAVIEQLDYAGAHFRSLDDPIDTTTPQGMFSLQVLGAVAQLDADFFCDGVDGSQ\n+RHRDVPR\n+>gi|384055711|ref|YP_005485335.1| transposase [Acetobacter pasteurianus IFO 3283-22]\n+MLTSRIHRRKPMGKPMSKATARANAAKSSIRAHVEHVFAHQKNRFNLFIRTIGLARAEAKLTLCNLAYNF\n+NRLIFHERLETAG\n+>gi|384055712|ref|YP_005485336.1| D-mannonate oxidoreductase [Acetobacter pasteurianus IFO 3283-22]\n+MNLNRNAISHVPDTVYTPRYDPALLRPGIVHLGCGNFHRGHQVVATQAAIDAEGRDGLRWGIVSATMRRP\n+DLATVLQSQDNLYTLLTREPANTVASVMAAITEAVYAGDDNANLAARIADPATAIVTLTVTASGYYLSAD\n+GRLDPTFEAIQADLTAITPRTAPGIIAAGLAQVRQRGGVPPVILCCDNVNSNGATLRQAVIDLAALKGDD\n+LLAAWIETNVQFPDTMVDRIVPTATPDDIADACRLLGGIEDRAPISAEPWFQWVIGEFDGPRPRWVAHPG\n+TKFVSDVGVFERAKLQMLNGTHMLLAYVGALANLNTVSEAASDDALGRIAARFMRNEQTADVSLDTDELD\n+RYTVDLMQRFRNPGIVHEVTRIGRNGSAKMASRIVQPMRSNIEAGRPVDGAVLLIASWIRWFALHEQDEF\n+DIALTDPRAETLRGLCADARDDHKAQAEAFLAMEEVFGAPLPDHGKQVEAIASMLRRLTEESVPELLRTI\n+AH\n+>gi|384055713|ref|YP_005485337.1| phosphatase/phosphohexomutase [Acetobacter pasteurianus IFO 3283-22]\n+MTDTVFPAHLLKHKQEPVHGVVFDMDGLLLDSESLAMEALVFAARDLNYDIPMSFCRTMIGVPADGCRTM\n+VRKTYGQDFPLERFFELQEVHLRNFVDTGKLALKKGVLPLLDLLDTYKIPRAIATSSSRVRTDHHLKLVN\n+LFHRFNAIVTRDDVSKGKPDPEPYLTAAKKIGVNPAHALALEDSHSGARAAHAAGIRVIVVPDLLEATDE\n+IRGKALAIVQDLSIVEAYLKHAITGQA\n+>gi|384055714|ref|YP_005485338.1| hypothetical protein APA22_40090 [Acetobacter pasteurianus IFO 3283-22]\n+MRRDMDLVRQLLLKLEGIEKGPHDVLLIGGNSEEVAVDGRTSDEIYFHLTKIEEAGFLERVGGGAMTAVT\n+FRALSWKGQEFLDTIRDDSIWKKTKEKAGSASFDILAAVAKAVIKDRIKSLTGLDIG\n+>gi|384055715|ref|YP_005485339.1| hypothetical protein APA22_40100 [Acetobacter pasteurianus IFO 3283-22]\n+MRPLGSGLSVRTYGCSEADDQENDGWAKKDTGEIVALYEMSSPVMPSGLVSISRWKIKGCYPKSGLSRAM\n+LCPTKIPQSASNIALLIGSDWSFIEENVFCNHIEWQTCLPVFVMNLDHPA\n+>gi|384055716|ref|YP_005485340.1| DNA helicase superfamily I [Acetobacter pasteurianus IFO 3283-22]\n+MSSKPSHHSVLSYWHSALLDDAQMKISFSRDNLVALDEEGFEKGKLPPDKTQALRKMHPASRDLAPDDSI\n+IAMAGIRILLGQVSHSTEHSKQPALFCMAMLVNVSPEGTIQPLKDAPPWINRELLEPSDGDVLIGDLATM\n+DTWLQLNPFEGGSLGKTLEWAEKLWNAVTGEDGLPDGYELWERVALQPAEASIGMIATLHQRRFYDTVLA\n+DTGLVTPLLARYIDGGPEPAVVDESQKWAAAGRARGTMTFAYGMSSSQSEAMTAFCSVKDGDILAVNGPP\n+GTGKTTLLQGIVATELVTRALEGGDPAVIVGTSTNNQAVTNIIDAMKKAMASKDSRPWARRWIEGADALG\n+LYFPSGEKEKEALKAGYLIASPGRGLGTMEWKGFPERERDTVDAWASRDAWINGYYGSFYPGVTPPLRKE\n+HLSGHGPQGARHDISLVEDGIAKIRARMKVLVETGRVCAGEARKLNQLYVASGYGTYPDITKAIAQREAL\n+LQERRPREDALKSDLKEKEAAAAVPRARINEENRKTRDLLKQRDDAVHAAGQKVEEVGAHAVALIAALPG\n+GGFFSNLMSGRNWANVERLVAEGRQGSFFRSLMQAQVKSKREWMDAINEMTASAERELATVRESREETRQ\n+ARDTLIQKLEREVAAADLVSKTARAEYDHYVGGSYVLAGRELEKLVTLKHQILQQLQDCCTAIETVLAPS\n+DWAAMFDMPEEKLPWRQSNWTGRLDVIEDFLDR'..b'DEVAPAV\n+RHLISQIQTTIA\n+>gi|384055875|ref|YP_005485499.1| multidrug resistance transporter EmrB/QacA [Acetobacter pasteurianus IFO 3283-22]\n+MGTSMTSSRVTNPLFVLLAASTGCALTVLDTNVVAIILPTIAREFRASFADIEWVISTYVLCFASLLLPA\n+GAIADRYGRRRIYLIGITTFALTSLFCGAAPSATALYLARALQGVSAAFLLAPALAIIGHTFHNPDERNR\n+AWAIWGSIMGLTMVLAPIIGGIIAYALGWRWAFYINIPICVLLAGAVFILVKESRDTDARRLDPVGIIFF\n+AAFMFGLTWGMINGQASGWTSWNALNGFIGGSISLGIFIASERAQSRPMLDLGLFSNPRFLGAVWAMFAY\n+AASAQVMASMLPLFLQNGLGRSALQAGFAMLPFALAMLIFPHIGRLLERHISSSGILAGGLSCVAIGNGI\n+TAWGAYVGSWIIVMAGMVVIGSGGGLLNGETQKAIMSVVPKERSGMASGISTTSRFSGILLGFAMLSGIL\n+ATMVRKWVAAFGCGTGCHHPSDFADAIVAGDLPSAISGLEGSNQEIAIQHAHHAFSYGFAVALLVASIFA\n+LGSSITVFTLMQSKMKQNIT\n+>gi|384055876|ref|YP_005485500.1| transposase, partial [Acetobacter pasteurianus IFO 3283-22]\n+MLAYAVMASVRYQANSLKPKKTQLRTRQSLSAGPFRRSGASS\n+>gi|384055877|ref|YP_005485501.1| transposase [Acetobacter pasteurianus IFO 3283-22]\n+MQTECSAGAYEFPASCGRRVVARFDGGRMSSDGGVILVKQADDILGLSRRFAACFRDKRHPGFVEYRVED\n+LVRQRIMGLALGYEDLNDHDALRHDLIFGLASGRLSGGRANCAALAGKSTLNRLERSGQQADRYCRIIAD\n+HEALATLFVTLFLDQHEHAPARIVLDVDATDDRIHGHQEGRAFHGYYGHNCYLPLYVFCGDHLLSATLRT\n+ADRDPGKEALADIRRIVEQIRSRWPRVRILVRGDSGFARDSLMTWCEDNHVDFLFGLAGNTRLYDRIASL\n+SAEVRDEAATTGRAARGFASFDWITKDSWTRRRRVVAKAEWRHGNRYHRFIVTTLPQGMSDPRHLYEQIY\n+CARGDMENRIKECQMDLFSDRTSSHTIRANQLRLWFSAAAYVLLTALQRLALGQTSLETATCGTIRARLL\n+KIATRVTLSVRRIVLSMPDMFPCQHEFALAHARLRRLRQAI\n+>gi|384055878|ref|YP_005485502.1| transposase [Acetobacter pasteurianus IFO 3283-22]\n+MQTECSAGAYEFPASCGRRVVARFDGGRMSSDGGVIVVKQADDILGLSRRFAACFRDKRHPGFVEYRVED\n+LVRQRIMGLALGYEDLNDHDALRHDLIFGLASGRLSGGRANCAALAGKSTLNRLERSGHKADRYCRIIAD\n+HEALATLFVTLFLDQHEHAPARIVLDVDATDDRIHGHQEGRAFHGYYGHNCYLPLYVFCGDHLLSATLRT\n+ADRDPGKEALADIRRIVEQIRSRWPRVRILVRGDSGFARDSLMTWCEDNHVDFLFGLAGNTRLYDRIASL\n+SAEVRDEAATTGRAARGFASFDWITKDSWTRRRRVVAKAEWRHGNRYHRFIVTTLPQGMSDPRHLYEQIY\n+CARGDMENRIKECQMDLFSDRTSSHTIRANQLRLWFSAAAYVLLTALQRLALGQTSLETATCGTIRARLL\n+KIATRVTLSVRRIVLSMPDMFPCQHEFALAHARLRRLRQAI\n+>gi|384055879|ref|YP_005485503.1| DNA helicase II UvrD/Rep [Acetobacter pasteurianus IFO 3283-22]\n+MLQFSYMSEEADAIAAEIGRRAASGCAWHDIAVIYRQNRLSRAIEEALIQARVPYEIVGDVGFYQRVAVK\n+DALALLSLAARPDDRQSDEAFRADFSHLRQFRVIL\n+>gi|384055880|ref|YP_005485504.1| DNA helicase RecD/TraA [Acetobacter pasteurianus IFO 3283-22]\n+MTSAVVGEQCQTEALAGLVERVTFHNAENGFCVLRVKVRGQRDLVTVVGHAAMISAGEFVQMSGRWFNDH\n+THGLQFKAEFLKASPPTTVEGIERYLGSGMIRGIGPVYAKKLVKAFGEAVFDLIEQEPHRLREVTGIGPK\n+RAERIVGGWADQKVIREIMLFLHSNGVGTSRAVRIFKTYGQDAVRLISENPYRLAKDIRGIGFKTADQIA\n+RKMGIAPDAMIRVRAGISYALGEAMDEGHCGLPVGELLTSTAELLEVAAPLIETALALELEAGDVVADSV\n+GETSCIFLAGLYRAEQSIAERLRACAVGRPPWPEIDAEKAMTWVEGKTGLAMAPSQQEAVRLALRSKVLV\n+ITGGPGVGKTTLVNAILKIVTAKGTDVQLCAPTGRAAKRLSESTGLEGKTIHRLLETDPGNGSFKRDDTN\n+PLTCDLLVVDEASMVDVLLMRSLLRALPDSASLLIVGDVDQLPSVGPGQVLADIIGSDAVPVVRLTEVFR\n+QAAQSRIITNAHRINEGKMPELSAEEGSDFYFVEAAEPEVGLRKLLAVVKDRIPARFGLDPVRDVQVLCP\n+MNRGGLGARSLNIELQQALNPAGDVKVERFGWTYGPGDKVMQIANDYDRDVFNGDLGVIDKIDVEEGELT\n+VLFDGREVVYGFGELDELVLAYATTIHKSQGSEYPVVVIPLVTQHYTMLARNLLYTGVTRGRKLVVLVGQ\n+KKALAIAVRNQGGRLRWSKLRDWLVGTSGTGHLSRLKKP\n+>gi|384055881|ref|YP_005485505.1| phage integrase [Acetobacter pasteurianus IFO 3283-22]\n+MVESQVSHIQPEYKFHINLDEYDRRATLSADELKVVRRWKEENLVITKRQAPRLHKPLTDILYRSNLDRA\n+NSHRALKYLLLTVAHQEKPYWGWSEDLWVEIINNSPVLKKTGMVPQLIAVAYLLCGFRSVYKIQRNVATA\n+VVARLVFGAEIVDTECERLFSALTRVGFVCQTVRPLVPSVFAAVALQGENPKLESFDRKILEHTRECYTG\n+NHIAKRIGILSNGLAAMGLTSKVIHFRAYPPRHGTETDNINPEWMTWCRRWLETTTLREGSRRAVYNTLT\n+RIGIWLGREHPEVTGPEQWTVSVCADYLAAVDRLRVGDWGGSTFDYRLIPTVGQPLQAPTKVAYYQVMRR\n+FLSDIQSWEWARLRCNPRYHLSTPKNIAKYLGVNPRTIDDASWLKLTWASLNIEPDDLSPDCFYPFALLQ\n+AIAVVWTHAGLRSNEIARLRVGCTREQSEDVVDQSGNVVPAGQVCWLDVPEGKTSVAYTKPVGHAVHKYI\n+TAWMKKRASPRKHLDRRTGEHVHFLFQLRNRPIAKEVLNQTVIPLLCKKAGIPIEDSKGRITSHRGRASA\n+VSMLASVPQGMTIFDLAKWCGHTSVQSTMSYVRSKPTQLASAFAKADQAARMIEIVIDNEVIAAGATKDG\n+APWKYYDLGDSYCSNAFWSTCPHRMACARCYFNIPKPSAKGVVLAAQQAANRLLEEVWLSPEERDAVSGD\n+VEALEGMLNKLRDKPALDGRTPGEISATCGSQVSSPFTESE\n+>gi|384055882|ref|YP_005485506.1| transposase [Acetobacter pasteurianus IFO 3283-22]\n+MELGITPGQDADITQAEPLLENIEPDAFLADKAYDADRLIDRLIQRGITPVIPPKRNRTTRRVIPP\n' |
b |
diff -r a712b378e090 -r 0773b11fb822 rnaseq/cutadapt/cutadapt_adapters.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rnaseq/cutadapt/cutadapt_adapters.txt Sat Feb 21 16:56:49 2015 +0100 |
b |
@@ -0,0 +1,48 @@ +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACAGTGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTGATATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACACTTGAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTCAAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACAGTTCCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGAGCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATGTCAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACATTCCTATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAAAAGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAACTAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCACCGGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCACGATATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCACTCAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGATCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCAGGCGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCATGGCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCATTTTATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCAACAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCCGTCCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGGAATATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGTACGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTAGCTATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTATACATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTCAGAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACCTTGTAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGACGACATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGAGTGGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGATCAGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGCCAATATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGCTACATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGGTAGCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTAGAGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTCCGCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGAAAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTGGCCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACGTTTCGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAATCGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTACAGCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTATAATATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTCATTCATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTCCCGAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTCGAAGATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTCGGCAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTG +GATCGGAAGAGCACACGTCTGAACTCCAGTCACTTAGGCATCTCGTATGCCGTCTTCTGCTTG |