# HG changeset patch # User bgruening # Date 1424786129 18000 # Node ID d734579c3307c4c887fe493eb0dcfbecffc15eb4 # Parent db68266f7364995bb6b30460d2efc3773134fcef Uploaded diff -r db68266f7364 -r d734579c3307 readme.md --- a/readme.md Tue Feb 24 04:48:44 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -Galaxy workflow for the identification of candidate genes clusters ------------------------------------------------------------------- - -This approach screens two proteins against all nucleotide sequence from the -NCBI nt database within hours on our cluster, leading to all organisms with an inter- -esting gene structure for further investigation. As usual in Galaxy workflows every -parameter, including the proximity distance, can be changed and additional steps -can be easily added. For example additional filtering to refine the initial BLAST -hits, or inclusion of a third query sequence. - -![Workflow Image](https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_genes_located_nearby/find_genes_located_nearby.png) - - -Sample Data -=========== - -As an example, we will use two protein sequences from *Streptomyces aurantiacus* -that are part of a gene cluster, responsible for metabolite producion. - -You can upload both sequences directly into Galaxy using the "Upload File" tool -with either of these URLs - Galaxy should recognise this is FASTA files. - - * https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_genes_located_nearby/WP_037658548.fasta - * https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_genes_located_nearby/WP_037658557.fasta - -In addition you can find both sequences at the NCBI server: - * http://www.ncbi.nlm.nih.gov/protein/739806622 (cytochrome P450) - -```text ->gi|739806622|ref|WP_037658557.1| cytochrome P450 [Streptomyces aurantiacus] -MQRTCPFSVPPVYTKFREESPITQVVLPDGGKAWLVTKYDDVRAVMANPKLSSDRRAPDFPVVVPGQNAA -LAKHAPFMIILDGAEHAAARRPVISEFSVRRVAAMKPRIQEIVDGFIDDMLKMPKPVDLNQVFSLPVPSL -VVSEILGMPYEGHEYFMELAEILLRRTTDEQGRIAVSVELRKYMDKLVEEKIENPGDDLLSRQIELQRQQ -GGIDRPQLASLCLLVLLAGHETTANMINLGVFSMLTKPELLAEIKADPSKTPKAVDELLRFYTIPDFGAH -RLALDDVEIGGVLIRKGEAVIASTFAANRDPAVFDDPEELDFGRDARHHVAFGYGPHQCLGQNLGRLELQ -VVFDTLFRRLPELRLAVPEEELSFKSDALVYGLYELPVTW -``` - - * http://www.ncbi.nlm.nih.gov/protein/739806613 (beta-ACP synthase) - -``` ->gi|739806613|ref|WP_037658548.1| beta-ACP synthase [Streptomyces aurantiacus] -MSGRRVVVTGMEVLAPGGVGTDNFWSLLSEGRTATRGITFFDPAQFRSRVAAEIDFDPYAHGLTPQEVRR -MDRAAQFAVVAARGAVADSGLDTDTLDPYRIGVTIGSAVGATMSLDEDYRVVSDAGRLDLVDHTYADPFF -YNYFVPSSFATEVARLVGAQGPSSVVSAGCTSGLDSVGYAVELIREGTADVMVAGATDAPISPITMACFD -AIKATTPRHDDPEHASRPFDDTRNGFVLGEGTAVFVLEELESARRRGARIYAEIAGYATRSNAYHMTGLR -PDGAEMAEAITVALDEARMNPTAIDYINAHGSGTKQNDRHETAAFKRSLGEHAYRTPVSSIKSMVGHSLG -AIGSIEIAASILAIQHDVVPPTANLHTPDPQCDLDYVPLNAREQIVDAVLTVGSGFGGFQSAMVLAQPER -NAA -``` - - -Citation -======== - -If you use this workflow directly, or a derivative of it, or the associated -NCBI BLAST wrappers for Galaxy, in work leading to a scientific publication, -please cite: - -Peter J. A. Cock, John M. Chilton, Björn Grüning, James E. Johnson, Nicola Soranzo -NCBI BLAST+ integrated into Galaxy - -http://biorxiv.org/content/early/2015/01/21/014043 -http://dx.doi.org/10.1101/014043 - - -Availability -============ - -This workflow is available on the main Galaxy Tool Shed: - -http://toolshed.g2.bx.psu.edu/view/bgruening/find_genes_located_nearby_workflow - -Development is being done on github: - -https://github.com/bgruening/galaxytools/workflows/ncbi_blast_plus/ - - -Dependencies -============ - -These dependencies should be resolved automatically via the Galaxy Tool Shed: - -* http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus diff -r db68266f7364 -r d734579c3307 readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Tue Feb 24 08:55:29 2015 -0500 @@ -0,0 +1,83 @@ +Galaxy workflow for the identification of candidate genes clusters +------------------------------------------------------------------ + +This approach screens two proteins against all nucleotide sequence from the +NCBI nt database within hours on our cluster, leading to all organisms with an inter- +esting gene structure for further investigation. As usual in Galaxy workflows every +parameter, including the proximity distance, can be changed and additional steps +can be easily added. For example additional filtering to refine the initial BLAST +hits, or inclusion of a third query sequence. + +.. image:: https://raw.githubusercontent.com/bgruening/galaxytools/master/workflows/ncbi_blast_plus/find_genes_located_nearby/find_genes_located_nearby.png + + +Sample Data +=========== + +As an example, we will use two protein sequences from *Streptomyces aurantiacus* +that are part of a gene cluster, responsible for metabolite producion. + +You can upload both sequences directly into Galaxy using the "Upload File" tool +with either of these URLs - Galaxy should recognise this is FASTA files. + +* `WP_037658548.fasta `_ +* `WP_037658557.fasta `_ + +In addition you can find both sequences at the NCBI server: + * http://www.ncbi.nlm.nih.gov/protein/739806622 (cytochrome P450) + :: + + >gi|739806622|ref|WP_037658557.1| cytochrome P450 [Streptomyces aurantiacus] + MQRTCPFSVPPVYTKFREESPITQVVLPDGGKAWLVTKYDDVRAVMANPKLSSDRRAPDFPVVVPGQNAA + LAKHAPFMIILDGAEHAAARRPVISEFSVRRVAAMKPRIQEIVDGFIDDMLKMPKPVDLNQVFSLPVPSL + VVSEILGMPYEGHEYFMELAEILLRRTTDEQGRIAVSVELRKYMDKLVEEKIENPGDDLLSRQIELQRQQ + GGIDRPQLASLCLLVLLAGHETTANMINLGVFSMLTKPELLAEIKADPSKTPKAVDELLRFYTIPDFGAH + RLALDDVEIGGVLIRKGEAVIASTFAANRDPAVFDDPEELDFGRDARHHVAFGYGPHQCLGQNLGRLELQ + VVFDTLFRRLPELRLAVPEEELSFKSDALVYGLYELPVTW + + + * http://www.ncbi.nlm.nih.gov/protein/739806613 (beta-ACP synthase) + :: + + >gi|739806613|ref|WP_037658548.1| beta-ACP synthase [Streptomyces aurantiacus] + MSGRRVVVTGMEVLAPGGVGTDNFWSLLSEGRTATRGITFFDPAQFRSRVAAEIDFDPYAHGLTPQEVRR + MDRAAQFAVVAARGAVADSGLDTDTLDPYRIGVTIGSAVGATMSLDEDYRVVSDAGRLDLVDHTYADPFF + YNYFVPSSFATEVARLVGAQGPSSVVSAGCTSGLDSVGYAVELIREGTADVMVAGATDAPISPITMACFD + AIKATTPRHDDPEHASRPFDDTRNGFVLGEGTAVFVLEELESARRRGARIYAEIAGYATRSNAYHMTGLR + PDGAEMAEAITVALDEARMNPTAIDYINAHGSGTKQNDRHETAAFKRSLGEHAYRTPVSSIKSMVGHSLG + AIGSIEIAASILAIQHDVVPPTANLHTPDPQCDLDYVPLNAREQIVDAVLTVGSGFGGFQSAMVLAQPER + NAA + + +Citation +======== + +If you use this workflow directly, or a derivative of it, or the associated +NCBI BLAST wrappers for Galaxy, in work leading to a scientific publication, +please cite: + +Peter J. A. Cock, John M. Chilton, Björn Grüning, James E. Johnson, Nicola Soranzo +NCBI BLAST+ integrated into Galaxy + +* http://biorxiv.org/content/early/2015/01/21/014043 +* http://dx.doi.org/10.1101/014043 + + +Availability +============ + +This workflow is available on the main Galaxy Tool Shed: + +http://toolshed.g2.bx.psu.edu/view/bgruening/find_genes_located_nearby_workflow + +Development is being done on github: + +https://github.com/bgruening/galaxytools/tree/master/workflows/ncbi_blast_plus/find_genes_located_nearby + + +Dependencies +============ + +These dependencies should be resolved automatically via the Galaxy Tool Shed: + +* http://toolshed.g2.bx.psu.edu/view/devteam/ncbi_blast_plus