Mercurial > repos > sarahinraauzeville > star
changeset 6:b7ed4ee33fb6 draft default tip
Deleted selected files
author | sarahinraauzeville |
---|---|
date | Mon, 09 Dec 2019 10:55:33 -0500 |
parents | c4fc8ff6e280 |
children | |
files | GalaxyPath.pm README_STARV2 STAR_indexes.loc.sample sm_STAR2_V2.pl sm_STAR2_V2.xml tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 0 insertions(+), 659 deletions(-) [+] |
line wrap: on
line diff
--- a/GalaxyPath.pm Tue Dec 12 10:16:23 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,19 +0,0 @@ -package GalaxyPath; - -use parent 'Config::IniFiles'; - -sub _convert_path{ - my ($path)=@_; - return $path if ($path=~/^\//); - return "$ENV{'MY_GALAXY_DIR'}/galaxy/$path"; -} - -sub my_path{ - my $self = shift; - my ($section,$item)=@_; - defined $ENV{'MY_GALAXY_DIR'} || die 'no MY_GALAXY_DIR environment variable defined'; - -e $ENV{'MY_GALAXY_DIR'} || die "$ENV{'MY_GALAXY_DIR'} path not found"; - return join(',',map{_convert_path($_)}split/,/,$self->val($section,$item)); -} - -1;
--- a/README_STARV2 Tue Dec 12 10:16:23 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -************************************************************************************ -****** Add perl and xml wrapper files in tools/ d ****** -************************************************************************************ - -Download files with hg clone in tools/ directory, then you have these files availables (wrappers and scripts): -sm_STAR2_V2.pl sm_STAR2_V2.xml -GalaxyPath.pm -STAR_indexes.loc.sample -STAR_indexes.loc.sample - -Add GalaxyPath.pm file in /path/to/galaxy_sources/ directory - -************************************************************************************ -****** loc file ****** -************************************************************************************ - -mv STAR_indexes.loc.sample STAR_indexes.loc -then -Add STAR_indexes.loc in yout tool-data/ repository - -************************************************************************************ -****** tool_conf.xml file to update ****** -************************************************************************************ - -Add a tag <tool> in your section "RNAseq" (for instance) in your config/tool_conf.xml file: - <tool file="/path/to/sm_STAR2_V2.xml" /> - -************************************************************************************ -****** job_conf.xml file to update ****** -************************************************************************************ -Add a new section in config/job_conf.xml file, in <destinations> tag, add these lines: - - <destination id="sm_star_single_V2_job" runner="drmaa"> - <param id="galaxy_external_runjob_script">scripts/drmaa_external_runner.py</param> - <param id="galaxy_external_killjob_script">scripts/drmaa_external_killer.py</param> - <param id="galaxy_external_chown_script">scripts/external_chown_script.py</param> - <param id="nativeSpecification">-clear -V -q galaxy.q -l mem=12G -l h_vmem=16G -pe parallel_smp 2</param> - <env file="/galaxydata/galaxy-prod/my_config/local_env.sh"/> - </destination> - -and in <tools> tag: - <tool id="sm_star_single_V2" destination="sm_star_single_V2_job"/> - -************************************************************************************ -****** tool_data_table_conf.xml file to update ****** -************************************************************************************ - -Add these lines in your tool_data_table_conf.xml file: - - <!-- location of loc file for STAR V2 tool --> - <table name="STAR_indexes" comment_char="#"> - <columns>value, dbkey, name, path</columns> - <file path="my_tool-data/STAR_indexes.loc" /> - </table> - -************************************************************************************ -****** Add paths in galaxy.ini configuation file ****** -************************************************************************************ - -Add a my_bin/ directory in galaxy repository with symbolic links to binaries: -$ ls -ltrah /path/to/galaxy/my_bin/STAR* -lrwxrwxrwx 1 galaxy-prod wbioinfo 11 3 juin 2016 /galaxydata/galaxy-prod/my_bin/STAR -> STAR_2.4.0i - - -Add paths to binaries and a section [workPath], at the end of galaxy.ini file: -Dans geany ../../config/galaxy.ini & -[workPath] -STAR_PATH=/path/to/galaxy/my_bin/STAR - - -Add a my_workspace/ directory in galaxy repository to run this module in debug mode: -cd /path/to/galaxy/; -mkdir my_workspace/; -chmod 777 my_workspace/; - -Add paths to binaries and a section [workPath], at the end of galaxy.ini file: -Dans geany ../../config/galaxy.ini & -[workPath] -MYWORKSPACE=/path/to/galaxy/my_workspace/ - - -
--- a/STAR_indexes.loc.sample Tue Dec 12 10:16:23 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Bowtie indexed sequences data files. You will -#need to create these data files and then create a bowtie_indices.loc -#file similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The bowtie_indices.loc -#file has this format (longer white space characters are TAB characters): -# -#<unique_build_id> <dbkey> <display_name> <file_base_path> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/bowtie/hg18/, -#then the bowtie_indices.loc entry would look like this: -# -#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18 -# -#and your /depot/data2/galaxy/bowtie/hg18/ directory -#would contain hg18.*.ebwt files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt -#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt -#...etc... -# -#Your bowtie_indices.loc file should include an entry per line for each -#index set you have stored. The "file" in the path does not actually -#exist, but it is the prefix for the actual index files. For example: -# -#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon -#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full -#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19 -#...etc... -# - -#Banques temporaires mises en place au debut de l installation de Galaxy -#AthalianaAllChr AthalianaAllChr AthalianaAllChr /bank/bowtiedb/AthalianaAllChr -#Drosophila_melanogaster_AllChr Drosophila_melanogaster_AllChr Drosophila melanogaster All Chr /bank/bowtiedb/Drosophila_melanogaster_AllChr -#Homo_sapiens_AllChr Homo_sapiens_AllChr Homo_sapiens_AllChr /bank/bowtiedb/Homo_sapiens_AllChr -#yeast.nt yeast.nt yeast nt /bank/bowtiedb/yeast.nt - -#Banques definitives transmises par l administrateur system sur /bank/bowtiedb -STAR-Arabidopsis_thaliana STAR-Arabidopsis_thaliana STAR-Arabidopsis_thaliana /bank/STARdb/STAR-Arabidopsis_thaliana -STAR-ensembl_gallus_gallus_genome_masked STAR-ensembl_gallus_gallus_genome_masked STAR-ensembl_gallus_gallus_genome_masked /bank/STARdb/STAR-ensembl_gallus_gallus_genome_soft_masked -STAR-Gorilla_gorilla STAR-Gorilla_gorilla STAR-Gorilla_gorilla /bank/STARdb/STAR-Gorilla_gorilla -STAR-hg38 hg38 STAR-ensembl_homo_sapiens_genome (GRCh38/hg38) /galaxydata/galaxy_bank/homo_sapiens/hg38/ensembl-89 -STAR-Homo_sapiens_AllChr STAR-Homo_sapiens_AllChr STAR-Homo_sapiens_AllChr /bank/STARdb/STAR-Homo_sapiens_AllChr -STAR-ensembl_bos_taurus_genome STAR-ensembl_bos_taurus_genome STAR-ensembl_bos_taurus_genome /bank/STARdb/STAR-ensembl_bos_taurus_genome -STAR-ensembl_homo_sapiens_genome_masked STAR-ensembl_homo_sapiens_genome_masked STAR-ensembl_homo_sapiens_genome_masked /bank/STARdb/STAR-ensembl_homo_sapiens_genome_soft_masked -STAR-Macaca_mulatta STAR-Macaca_mulatta STAR-Macaca_mulatta /bank/STARdb/STAR-Macaca_mulatta -STAR-ensembl_bos_taurus_genome_masked STAR-ensembl_bos_taurus_genome_masked STAR-ensembl_bos_taurus_genome_masked /bank/STARdb/STAR-ensembl_bos_taurus_genome_soft_masked -STAR-ensembl_rattus_norvegicus_genome STAR-ensembl_rattus_norvegicus_genome STAR-ensembl_rattus_norvegicus_genome /bank/STARdb/STAR-ensembl_rattus_norvegicus_genome -STAR-Nomascus_leucogenys STAR-Nomascus_leucogenys STAR-Nomascus_leucogenys /bank/STARdb/STAR-Nomascus_leucogenys -STAR-ensembl_equus_caballus_genome STAR-ensembl_equus_caballus_genome STAR-ensembl_equus_caballus_genome /bank/STARdb/STAR-ensembl_equus_caballus_genome -STAR-ensembl_rattus_norvegicus_genome_masked STAR-ensembl_rattus_norvegicus_genome_masked STAR-ensembl_rattus_norvegicus_genome_masked /bank/STARdb/STAR-ensembl_rattus_norvegicus_genome_soft_masked -STAR-Pan_troglodytes STAR-Pan_troglodytes STAR-Pan_troglodytes /bank/STARdb/STAR-Pan_troglodytes -STAR-ensembl_equus_caballus_genome_masked STAR-ensembl_equus_caballus_genome_masked STAR-ensembl_equus_caballus_genome_masked /bank/STARdb/STAR-ensembl_equus_caballus_genome_soft_masked -# STAR-ensembl_sus_scrofa_genome susScr4 ensembl sus_scrofa genome /bank/STARdb/STAR-ensembl_sus_scrofa_genome -# STAR-ensembl_sus_scrofa_genome_masked STAR-ensembl_sus_scrofa_genome_masked STAR-ensembl_sus_scrofa_genome_masked /bank/STARdb/STAR-ensembl_sus_scrofa_genome_soft_masked -# -STAR-ensembl_sus_scrofa_V10.2 susScr3 Ensembl sus_scrofa genome V10.2 /galaxydata/galaxy_bank/sus_scrofa/Sscrofa10.2/ensembl-89/STAR-ensembl_sus_scrofa_genome -STAR-ensembl_sus_scrofa_V11.1 susScr11 Ensembl sus_scrofa genome V11.1 /galaxydata/galaxy_bank/sus_scrofa/Sscrofa11.1/ensembl-90/STAR-ensembl_sus_scrofa_genome -# -STAR-Papio_anubis STAR-Papio_anubis STAR-Papio_anubis /bank/STARdb/STAR-Papio_anubis -STAR-ensembl_gallus_gallus_genome STAR-ensembl_gallus_gallus_genome STAR-ensembl_gallus_gallus_genome /bank/STARdb/STAR-ensembl_gallus_gallus_genome -STAR-Pongo_abelii STAR-Pongo_abelii STAR-Pongo_abelii /bank/STARdb/STAR-Pongo_abelii -STAR-ensembl_danio_rerio_genome STAR-ensembl_danio_rerio_genome STAR-ensembl_danio_rerio_genome /bank/STARdb/STAR-ensembl_danio_rerio_genome -STAR-ensembl_danio_rerio_genome_masked STAR-ensembl_danio_rerio_genome_masked STAR-ensembl_danio_rerio_genome_masked /bank/STARdb/STAR-ensembl_danio_rerio_genome_soft_masked -STAR-ensembl_oreochromis_niloticus_genome STAR-ensembl_oreochromis_niloticus_genome STAR-ensembl_oreochromis_niloticus_genome /bank/STARdb/STAR-ensembl_oreochromis_niloticus_genome -STAR-ensembl_oreochromis_niloticus_genome_nonchomosomal STAR-ensembl_oreochromis_niloticus_genome_nonchomosomal STAR-ensembl_oreochromis_niloticus_genome_nonchomosomal /bank/STARdb/STAR-ensembl_oreochromis_niloticus_genome_non_chromosomal -STAR_oryCun2_ucsc STAR_oryCun2_ucsc STAR oryCun2 ucsc /bank/STARdb/STAR-oryCun2_ucsc -STAR-Capra_hircus STAR-Capra_hircus STAR-Capra_hircus /bank/STARdb/STAR-Capra_hircus -STAR-ensembl_ovis_aries_genome STAR-ensembl_ovis_aries_genome STAR-ensembl_ovis_aries_genome /bank/STARdb/STAR-ensembl_ovis_aries_genome -STAR-Mycoplasma_agalactiae_PG2_uid61619 STAR-Mycoplasma_agalactiae_PG2_uid61619 STAR-Mycoplasma_agalactiae_PG2_uid61619 /bank/STARdb/STAR-Mycoplasma_agalactiae_PG2_uid61619_test -Mycoplasma_agalactiae_uid46679 Mycoplasma_agalactiae_uid46679 Mycoplasma_agalactiae_uid46679 /bank/STARdb/STAR-Mycoplasma_agalactiae_uid46679_test -Mycoplasma_bovis_HB0801_uid168665 Mycoplasma_bovis_HB0801_uid168665 Mycoplasma_bovis_HB0801_uid168665 /bank/STARdb/STAR-Mycoplasma_bovis_HB0801_uid168665_test -Mycoplasma_bovis_Hubei_1_uid68691 Mycoplasma_bovis_Hubei_1_uid68691 Mycoplasma_bovis_Hubei_1_uid68691 /bank/STARdb/STAR-Mycoplasma_bovis_Hubei_1_uid68691_test -Mycoplasma_bovis_PG45_uid60859 Mycoplasma_bovis_PG45_uid60859 Mycoplasma_bovis_PG45_uid60859 /bank/STARdb/STAR-Mycoplasma_bovis_PG45_uid60859_test -Mycoplasma_capricolum_ATCC_27343_uid58525 Mycoplasma_capricolum_ATCC_27343_uid58525 Mycoplasma_capricolum_ATCC_27343_uid58525 /bank/STARdb/STAR-Mycoplasma_capricolum_ATCC_27343_uid58525_test -Mycoplasma_hominis_ATCC_23114_uid41875 Mycoplasma_hominis_ATCC_23114_uid41875 Mycoplasma_hominis_ATCC_23114_uid41875 /bank/STARdb/STAR-Mycoplasma_hominis_ATCC_23114_uid41875_test -Mycoplasma_mycoides_capri_LC_95010_uid66189 Mycoplasma_mycoides_capri_LC_95010_uid66189 Mycoplasma_mycoides_capri_LC_95010_uid66189 /bank/STARdb/STAR-Mycoplasma_mycoides_capri_LC_95010_uid66189_test -Mycoplasma_mycoides_SC_Gladysdale_uid197153 Mycoplasma_mycoides_SC_Gladysdale_uid197153 Mycoplasma_mycoides_SC_Gladysdale_uid197153 /bank/STARdb/STAR-Mycoplasma_mycoides_SC_Gladysdale_uid197153_test -Mycoplasma_mycoides_SC_PG1_uid58031 Mycoplasma_mycoides_SC_PG1_uid58031 Mycoplasma_mycoides_SC_PG1_uid58031 /bank/STARdb/STAR-Mycoplasma_mycoides_SC_PG1_uid58031_test -Mycoplasma_pneumoniae_309_uid85495 Mycoplasma_pneumoniae_309_uid85495 Mycoplasma_pneumoniae_309_uid85495 /bank/STARdb/STAR-Mycoplasma_pneumoniae_309_uid85495_test -Mycoplasma_pneumoniae_FH_uid162027 Mycoplasma_pneumoniae_FH_uid162027 Mycoplasma_pneumoniae_FH_uid162027 /bank/STARdb/STAR-Mycoplasma_pneumoniae_FH_uid162027_test -Mycoplasma_pneumoniae_M129_B7_uid185759 Mycoplasma_pneumoniae_M129_B7_uid185759 Mycoplasma_pneumoniae_M129_B7_uid185759 /bank/STARdb/STAR-Mycoplasma_pneumoniae_M129_B7_uid185759_test -Mycoplasma_pneumoniae_M129_uid57709 Mycoplasma_pneumoniae_M129_uid57709 Mycoplasma_pneumoniae_M129_uid57709 /bank/STARdb/STAR-Mycoplasma_pneumoniae_M129_uid57709_test -Mycoplasma_agalactiae_GCF_000089865 Mycoplasma_agalactiae_GCF_000089865 Mycoplasma_agalactiae_GCF_000089865 /bank/STARdb/STAR-Mycoplasma_agalactiae_GCF_000089865 -Assembly_Bacteria_2014-06-24-Mycoplasma_agalactiae_GCF_000089865 Assembly_Bacteria_2014-06-24-Mycoplasma_agalactiae_GCF_000089865 Assembly_Bacteria_2014-06-24-Mycoplasma_agalactiae_GCF_000089865 /bank/ncbi/genomes/Assembly_Bacteria/Assembly_Bacteria_2014-06-24/STAR/STAR-Mycoplasma_agalactiae_GCF_000089865 -STAR-ensembl_canis_familiaris_genome_soft_masked STAR-ensembl_canis_familiaris_genome_soft_masked STAR-ensembl_canis_familiaris_genome_soft_masked /bank/STARdb/STAR-ensembl_canis_familiaris_genome_soft_masked -STAR-ensembl_canis_familiaris_genome STAR-ensembl_canis_familiaris_genome STAR-ensembl_canis_familiaris_genome /bank/STARdb/STAR-ensembl_canis_familiaris_genome -STAR-ensembl_mus_musculus_primary_genome STAR-ensembl_mus_musculus_primary_genome STAR-ensembl_mus_musculus_primary_genome /bank/STARdb/STAR-ensembl_mus_musculus_primary_genome -STAR-Ovis_aries_2015-12-19_NCBI STAR-Ovis_aries_2015-12-19_NCBI STAR-Ovis_aries_2015-12-19_NCBI /bank/ncbi/genomes/Ovis_aries/current/STAR/STAR-Ovis_aries -#Note that for backwards compatibility with workflows, the unique ID of -#an entry must be the path that was in the original loc file, because that -#is the value stored in the workflow for that parameter. That is why the -#hg19 entry above looks odd. New genomes can be better-looking. -#
--- a/sm_STAR2_V2.pl Tue Dec 12 10:16:23 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,236 +0,0 @@ -#!/usr/bin/perl -w - -# usage : perl sm_STAR.pl <read1.fastq.gz> <read2.fastq.gz> -# 10/02/2014 - Wrapper du traitement des données RNAseq -# Sarah Maman -# Copyright (C) 2014 INRA -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. -# -use strict; -use File::Basename; -use Getopt::Long; -use lib "$ENV{'MY_GALAXY_DIR'}"; -use GalaxyPath; - -my $cfg = GalaxyPath->new( -file => $ENV{"GALAXY_CONFIG_FILE"}); -my $PATH = $cfg->my_path( 'workPath', 'MYWORKSPACE' ); -my $STAR = $cfg->my_path( 'toolsPath', 'STAR_PATH' ); - - - -my $Nthreads; -my $genome_path; -my $reads_selector; -my $input_read; -my $Read1fastqgz; -my $Read2fastqgz; -my $alignIntronMin; -my $alignIntronMax; -my $outFilterMismatchNmax; -my $orientation; -my $refownfastaref; -my $refselector; -my $refowngtf; -my $compress; -my $cufflinks; -my $outputfile; -my $outputfileT; -my $outputlogSJ; -my $outputlogfinal; - - -Getopt::Long::Configure( 'no_ignorecase', 'bundling' ); -GetOptions ( - 'runThreadN=i' => \$Nthreads, - 'genomeDir=s' => \$genome_path, - 'refselector=s' => \$refselector, - 'refownfastaref=s' => \$refownfastaref, - 'refowngtf=s' => \$refowngtf, - 'compress=s' => \$compress, - 'cufflinks=s' => \$cufflinks, - 'readsselector=s'=> \$reads_selector, - 'readFilesIn1=s' => \$Read1fastqgz, - 'readFilesIn2=s' => \$Read2fastqgz, - 'readsinputread=s' => \$input_read, - 'alignIntronMin=i' => \$alignIntronMin, - 'alignIntronMax=i' => \$alignIntronMax, - 'outFilterMismatchNmax=i' => \$outFilterMismatchNmax, - 'orientation=s' => \$orientation, - 'outputfile=s' => \$outputfile, - 'outputfileT=s' => \$outputfileT, - 'outputlogfinal=s' => \$outputlogfinal, - 'outputlogSJ=s' => \$outputlogSJ -) or die "Usage: Error in command line arguments\n"; - -my $cmd1 = ''; my $cmd2 =''; -my $cmd3 = ''; my $cmd4 =''; - -#STAR --runThreadN 4 --runMode genomeGenerate --genomeDir /work/smaman/TP_RNAseq/INDEX/ --genomeFastaFiles ITAG2.3_genomic_Ch6.fasta --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --sjdbOverhang 100 - -#smaman@node001 /work/smaman/TP_RNAseq $ ls -ltrah INDEX -#-rw-r--r-- 1 smaman BIOINFO 331 17 juil. 11:55 genomeParameters.txt -#-rw-r--r-- 1 smaman BIOINFO 387K 17 juil. 11:55 exonGeTrInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 53K 17 juil. 11:55 geneInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 151K 17 juil. 11:55 transcriptInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 171K 17 juil. 11:55 exonInfo.tab -#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.fromGTF.out.tab -#-rw-r--r-- 1 smaman BIOINFO 272K 17 juil. 11:55 sjdbInfo.txt -#-rw-r--r-- 1 smaman BIOINFO 325K 17 juil. 11:55 sjdbList.out.tab -#-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrName.txt -#-rw-r--r-- 1 smaman BIOINFO 9 17 juil. 11:55 chrLength.txt -#-rw-r--r-- 1 smaman BIOINFO 11 17 juil. 11:55 chrStart.txt -#-rw-r--r-- 1 smaman BIOINFO 20 17 juil. 11:55 chrNameLength.txt -#-rw-r--r-- 1 smaman BIOINFO 47M 17 juil. 11:55 Genome -#-rw-r--r-- 1 smaman BIOINFO 360M 17 juil. 11:55 SA -#-rw-r--r-- 1 smaman BIOINFO 1,5G 17 juil. 11:55 SAindex - - -#STAR --readFilesIn WTr1.fastq WTr2.fastq --genomeDir /work/smaman/TP_RNAseq/INDEX/ --sjdbGTFfile ITAG_pre2.3_gene_models_Ch6.gtf --outSAMtype BAM SortedByCoordinate --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNamePrefix galaxyName --outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM - -#-rw-r--r-- 1 smaman BIOINFO 45M 26 mars 2015 ITAG2.3_genomic_Ch6.fasta -#-rw-r--r-- 1 smaman BIOINFO 1,6M 26 mars 2015 ITAG_pre2.3_gene_models_Ch6.gtf -#-rw-r--r-- 1 smaman BIOINFO 29 26 mars 2015 ITAG2.3_genomic_Ch6.fasta.fai -#-rw-r--r-- 1 smaman BIOINFO 614 17 juil. 10:20 WTr1.fastq -#-rw-r--r-- 1 smaman BIOINFO 589 17 juil. 10:20 WTr2.fastq -#-rw-r--r-- 1 smaman BIOINFO 14K 17 juil. 11:55 Log.out -#-rw-r--r-- 1 smaman BIOINFO 35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam -#-rw-r--r-- 1 smaman BIOINFO 637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++ -#-rw-r--r-- 1 smaman BIOINFO 0 17 juil. 12:03 galaxyNameSJ.out.tab ++++++++++++++++ -#-rw-r--r-- 1 smaman BIOINFO 246 17 juil. 12:03 galaxyNameLog.progress.out -#-rw-r--r-- 1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out +++++++++++++++ -#-rw-r--r-- 1 smaman BIOINFO 16K 17 juil. 12:03 galaxyNameLog.out - - - -#workspace -my $debug = 0; #Mode debug -if ($debug == 0) - { - print STDOUT "Debug mode OK \n"; - } -else - { - $PATH = dirname($outputfile); - print STDOUT "No debug \n"; - } - - -#Récuperer le numero (unique) de l'output afin, si besoin, de créer un répertoire de travail unique dans /work/galaxy-dev/workspace -my ($nb) = ($outputfile=~/dataset_(\d+)\.\S+$/); - -#Repertoire de sortie cree par le script, verif des droits d'ecriture sur ce repertoire de sortie -`cd $PATH/; mkdir $nb/; chmod -R 777 $nb/; cd $nb/;`; -my $dirresults= "$PATH/".$nb; - -print STDOUT "Job working directory : $dirresults \n"; - - -if ($refselector eq "ownfasta"){ - my $cmdSTARindex="(cd $dirresults/; mkdir INDEX/; chmod 777 INDEX/; $STAR --runThreadN $Nthreads --runMode genomeGenerate --genomeDir $dirresults/INDEX --genomeFastaFiles $refownfastaref --sjdbGTFfile $refowngtf --sjdbOverhang 100) >& ./out_Starindex.log 2>&1"; - system $cmdSTARindex; - #Info pour les biologistes - print STDOUT "STAR Genome Generate : \n\n $cmdSTARindex \n\n "; - $genome_path = "$dirresults/INDEX/"; -} - -my $addcuff; -if ($cufflinks eq "cuff"){ - $addcuff="--outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM "; -}else{ - $addcuff=""; -} - - -my $cat; -if ($reads_selector eq "single"){ - - my $in; - if ($compress eq "compress"){ - #Si besoin, recupération du fichier de configuration avec modification de l extension - `ln -s $input_read $dirresults/input_read.fastq.gz;`; - $in = "$dirresults/input_read.fastq.gz"; - $cat="--readFilesCommand zcat"; - }else - {`ln -s $input_read $dirresults/input_read.fastq;`; - $in = "$dirresults/input_read.fastq"; - $cat="";} - - if ($orientation eq "No"){ - $cmd1 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - system $cmd1; - #Info pour les biologistes - print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd1 \n\n "; - } - else - { - $cmd2 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - system $cmd2; - #Info pour les biologistes - print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd2 \n\n - Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n"; - } -}else{ - - - my $in1; - my $in2; - if ($compress eq "compress"){ - #Si besoin, recupération du fichier de configuration avec modification de l extension - `ln -s $Read1fastqgz $dirresults/Read1.fastq.gz; ln -s $Read2fastqgz $dirresults/Read2.fastq.gz;`; - $in1="$dirresults/Read1.fastq.gz"; - $in2="$dirresults/Read2.fastq.gz"; - $cat="--readFilesCommand zcat"; - }else - {`ln -s $Read1fastqgz $dirresults/Read1.fastq; ln -s $Read2fastqgz $dirresults/Read2.fastq;`; - $in1="$dirresults/Read1.fastq"; - $in2="$dirresults/Read2.fastq"; - $cat="";} - - - if ($orientation eq "No"){ - $cmd3 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - system $cmd3; - #Info pour les biologistes - print STDOUT "STAR command run on cluster without oriented reads : \n\n $cmd3 \n\n "; - } - else - { - $cmd4 = "(cd $dirresults; $STAR --runThreadN $Nthreads --genomeDir $genome_path --readFilesIn $in1 $in2 --outSAMtype BAM SortedByCoordinate --alignIntronMin $alignIntronMin --alignIntronMax $alignIntronMax --outFilterMismatchNmax $outFilterMismatchNmax $cat --outFileNamePrefix $nb $addcuff) >& ./out_Star.log 2>&1"; - #Info pour les biologistes - system $cmd4; - print STDOUT "STAR command run on cluster with oriented reads : \n\n $cmd4 \n\n - Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs.\n\n"; - } - - -} - -#Si besoin : -#TEST 1 : command ligne on vm-galaxy -#TEST 2 perl Galaxy file : perl script.pl path/to/tests/files/used/for/galaxy/perl/script out1 - -#Recuperation des fichiers par Galaxy -#-rw-r--r-- 1 smaman BIOINFO 35K 17 juil. 12:03 galaxyNameAligned.toTranscriptome.out.bam +++++ -#-rw-r--r-- 1 smaman BIOINFO 637 17 juil. 12:03 galaxyNameAligned.sortedByCoord.out.bam +++++++++ -#-rw-r--r-- 1 smaman BIOINFO 0 17 juil. 12:03 galaxyNameSJ.out.tab ++++++++++++++++ -#-rw-r--r-- 1 smaman BIOINFO 1,7K 17 juil. 12:03 galaxyNameLog.final.out +++++++++++++++ -my $bam = glob("$dirresults/*$nb*Aligned.sortedByCoord.out.bam"); -if (! -e $bam){print STDERR "Aligned.sortedByCoord.out.bam file not found. \n";}else{`cp -a $bam $outputfile`;} -my $bamT = glob("$dirresults/*$nb*Aligned.toTranscriptome.out.bam"); -if (! -e $bamT){print STDERR "Aligned.toTranscriptome.out.bam file not found. \n";}else{`cp -a $bamT $outputfileT`;} -my $logSJ = glob("$dirresults/$nb*SJ.out.tab"); -if (! -e $logSJ){print STDERR "SJ.out.tab log file not found. \n";}else{`cp -a $logSJ $outputlogSJ`;} -my $logfinal = glob("$dirresults/$nb*Log.final.out"); -if (! -e $logfinal){print STDERR "Log.final.out log file not found. \n";}else{`cp -a $logfinal $outputlogfinal`;} -
--- a/sm_STAR2_V2.xml Tue Dec 12 10:16:23 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,219 +0,0 @@ -<!--# Copyright (C) 2014 INRA -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see http://www.gnu.org/licenses/. -#--> -<tool id="sm_star_single_V2" name="Map with STAR 2.4.0i"> - <description> with GTF and reference</description> - <command interpreter="perl">sm_STAR2_V2.pl - --runThreadN $Nthreads - --refselector $ref.ref_selector - #if $ref.ref_selector =="genotoul": - --genomeDir $ref.reffile.fields.path - #else: - --refownfastaref $ref.ownfastaref - --refowngtf $ref.owngtf - #end if - #if $reads.reads_selector =="paired": - --readsselector $reads.reads_selector - --readFilesIn1 $Read1fastqgz - --readFilesIn2 $Read2fastqgz - #else: - --readsselector $reads.reads_selector - --readsinputread $reads.inputread - #end if - --compress $compress - --alignIntronMin $alignIntronMin - --alignIntronMax $alignIntronMax - --outFilterMismatchNmax $outFilterMismatchNmax - --orientation $orientation - --cufflinks $cufflinks - --outputfile $outputfile - --outputfileT $outputfileT - --outputlogSJ $outputlogSJ - --outputlogfinal $outputlogfinal - </command> - <inputs> - - <conditional name="reads"> - <param name="reads_selector" type="select" label="Paired or single reads"> - <option value="paired">Paired reads</option> - <option value="single">Single reads</option> - </param> - <when value="paired"> - <param format="fastq.gz" name="Read1fastqgz" type="data" label="First input fastq gzipped file (read1.fastq.gz)"/> - <param format="fastq.gz" name="Read2fastqgz" type="data" label="Second input fastq gzipped file (read2.fastq.gz)"/> - </when> - <when value="single"> - <param format="fastq, fastqsanger, fastqillumina" name="inputread" type="data" label="Your single read RNA-Seq FASTQ file"/> - </when> - </conditional> - - <param name="compress" type="select" help="fastq files are compressed or not" label="compressed fastq file"> - <option value="compress">Yes, compressed</option> - <option value="notcompress">Not compressed</option> - </param> - - - <param name="Nthreads" size="30" type="text" value="8" label="Threads number"/> - <param name="alignIntronMin" size="30" type="text" value="20" label="alignIntronMin"/> - <param name="alignIntronMax" size="30" type="text" value="1000000" label="alignIntronMax"/> - <param name="outFilterMismatchNmax" size="30" type="text" value="10" label="outFilterMismatchNmax"/> - <param name="orientation" type="select" help="Instead, you need to run Cufflinks with the library option --library-type options. For example, cufflinks -library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has to be used only for Cufflinks runs and not for STAR runs." label="RNAseq oriented (default : oriented for STAR - Option to be set only for cufflinks runs)"> - <option value="Yes">Yes</option> - <option value="No">No</option> - </param> - - - - - <conditional name="ref"> - <param name="ref_selector" type="select" label="Genotoul reference genome or your own fasta file"> - <option value="genotoul">Genotoul reference genome</option> - <option value="ownfasta">Your own fasta file</option> - </param> - <when value="ownfasta"> - <param format="fasta, fa" name="ownfastaref" type="data" label="Your own reference genome"/> - <param format="gtf" name="owngtf" type="data" label="Your own GTF file"/> - </when> - <when value="genotoul"> - <param name="reffile" type="select" label="Using reference genome" help="Select genome from the list"> - <options from_data_table="STAR_indexes"> - <filter type="sort_by" column="2" /> - <validator type="no_options" message="No indexes are available" /> - </options> - <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> - </param> - </when> - </conditional> - - <param name="cufflinks" type="select" label="For Cufflinks-like strand field flag and types of quantification requested"> - <option value="cuff">Yes</option> - <option value="nocuff">No</option> - </param> - - - - </inputs> - <outputs> - <data format="bam" name="outputfile" label ="Aligned.sortedByCoord.out.bam"/> <!-- choisir un label le plus court possible --> - <data format="bam" name="outputfileT" label ="Aligned.toTranscriptome.out.bam"/> <!-- choisir un label le plus court possible --> - <data format="txt" name="outputlogSJ" label ="SJ.out.tab"/> - <data format="txt" name="outputlogfinal" label ="Log.final.out"/> - </outputs> - <help> - -.. class:: infomark - - What it does : This program STAR allows you to aligns RNA-seq reads to a reference genome using uncompressed suffix arrays. - -.. class:: warningmark - -**Command line change if RNAseq reads are oriented or not.** - - -Command line : - -STAR --readFilesIn R1.fastq R2.fastq --genomeDir /path/to/STARindex/ --sjdbGTFfile ref.gtf --alignIntronMin 20 --alignIntronMax 1000000 --outFilterMismatchNmax 10 --outSAMtype BAM SortedByCoordinate --runThreadN 4 --outFileNamePrefix galaxyName - -If fastq input files are compressed, add this option: - ---readFilesCommand zcat for fastq.gz files. - -For Cufflinks-like strand field flag and types of quantification requested, add these options: - ---outSAMstrandField intronMotif --outFilterIntronMotifs RemoveNoncanonical --outFilterType BySJout --quantMode TranscriptomeSAM - - - -**Parameters** - ---readFilesIn : name(s) (with path) of the files containing the sequences to be mapped (e.g. RNA-seq FASTQ files). If using Illumina paired-end reads, the read1 and read2 files have to be supplied. STAR can process both FASTA and FASTQ files. Multi-line (i.e. sequence split in multiple lines) FASTA file are supported. - ---sjdbGTFfile : species the path to the file with annotated transcripts in the standard GTF format. STAR will extract splice junctions from this file and use them to greatly improve accuracy of the mapping. While this is optional, and STAR can be run without annotations, using annotations is highly recommended whenever they are available. - - -**If fastq input files are compressed :** - ---readFilesCommand : UncompressionCommand option, where UncompressionCommand is the un-compression command that takes the file name as input parameter, and sends the uncompressed output to stdout. For example, for gzipped files (*.gz) use --readFilesCommand zcat OR --readFilesCommand gzip -c. For bzip2-compressed files, use --readFilesCommand bzip2 -c - - -**Other parameters** - ---alignIntronMin (default: 21) : minimum intron size: genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion - ---alignIntronMax (default: 0) : maximum intron size, if 0, max intron size will be determined by (2ˆwinBinNbits)*winAnchorDistNbins - ---outFilterMismatchNmax (default: 10) alignment will be output only if it has fewer mismatches than this value. - ---outSAMtype BAM SortedByCoordinate : output sorted by coordinate Aligned.sortedByCoord.out.bam file, similar to samtools sort command. - ---outFileNamePrefix : output files name prefix (including full or relative path). Can only be defined on the command line. - - -**For Cufflinks-like strand field flag** - ---outSAMstrandField intronMotif : strand derived from the intron motif. Reads with inconsistent and/or non-canonical introns are filtered out. - -In addition, it is recommended to remove the non-canonical junctions for Cufflinks runs using --outFilterIntronMotifs RemoveNoncanonical. - ---outFilterType BySJout : keep only those reads that contain junctions that passed filtering into SJ.out.tab - - -**Types of quantification requested** - ---quantMode TranscriptomeSAM : output SAM/BAM alignments to transcriptome into a separate file - - -**job** - ---runThreadN option defines the number of threads to be used for genome generation, it has to be set to the number of available cores on the server node. - - - ---- - -Version Galaxy Tool : V2.0 - -Versions of bioinformatics tools used : - - A. Dobin et al, Bioinformatics 2012; doi: 10.1093/bioinformatics/bts635 - - "STAR: ultrafast universal RNA-seq aligner" - - Important announcements from the author: https://groups.google.com/d/forum/rna-star-announce - - General user mailing list (recommended): https://groups.google.com/d/forum/rna-star - - Email: dobin@cshl.edu - - STAR_2.4.0i - ---- - -Contacts (noms et emails) : sigenae-support@listes.inra.fr - -E-learning available : Not yet. - -Please cite : - - Depending on the help provided you can cite us in acknowledgements, references or both. - - Examples : - Acknowledgements - We wish to thank the SIGENAE group for .... - - References - X. SIGENAE [http://www.sigenae.org/] - - </help> -</tool>
--- a/tool_data_table_conf.xml.sample Tue Dec 12 10:16:23 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,7 +0,0 @@ -<tables> - <!-- location of loc file for STAR V2 tool --> - <table name="STAR_indexes" comment_char="#"> - <columns>value, dbkey, name, path</columns> - <file path="my_tool-data/STAR_indexes.loc" /> - </table> -</tables>