Mercurial > repos > saharlcc > isoem2_isode2
changeset 18:c6d2dbdf0a4d draft
Uploaded
author | saharlcc |
---|---|
date | Fri, 26 May 2017 08:13:06 -0400 |
parents | 40e893e4a37f |
children | 532b1e5feb8f |
files | isoem2_isode2.zip isoem2_isode2/IsoEM.loc isoem2_isode2/IsoEM.loc.sample isoem2_isode2/README.txt isoem2_isode2/isoDE.xml isoem2_isode2/isoem_wrapper.sh isoem2_isode2/isoem_wrapper.xml isoem2_isode2/tool_data_table_conf.xml isoem2_isode2/tool_data_table_conf.xml.sample |
diffstat | 9 files changed, 120 insertions(+), 274 deletions(-) [+] |
line wrap: on
line diff
--- a/isoem2_isode2/IsoEM.loc Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/IsoEM.loc Fri May 26 08:13:06 2017 -0400 @@ -3,10 +3,10 @@ #NOTE: All entries in this file MUST be tab-delimited #Every entry has the following 5 fields: -#Reference name <GTF> <TMAP_INDEX> <HISAT2_INDEX> <Cluster file> - +#Referencename <GTF> <HISAT2_INDEX> <Cluster file> -mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt -hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt - \ No newline at end of file +hg38_RefSeq /galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF /galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/hg38-RefSeq/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt +mm10_RefSeq /galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF /galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/mm10-RefSeq/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt + +
--- a/isoem2_isode2/IsoEM.loc.sample Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/IsoEM.loc.sample Fri May 26 08:13:06 2017 -0400 @@ -3,10 +3,10 @@ #NOTE: All entries in this file MUST be tab-delimited #Every entry has the following 5 fields: -#Reference name <GTF> <TMAP_INDEX> <HISAT2_INDEX> <Cluster file> - +#Referencename <GTF> <HISAT2_INDEX> <Cluster file> -mm10-RefSeq77 /import1/GTF/mm10RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/mm10-RefSeq77/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt -hg38-RefSeq77 /import1/GTF/hg38RefSeq77UCSCAug29-2016.gtf /import1/tmap-index/tmap3.4.1/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/hisat2-index/hg38-RefSeq77/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /import1/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt - \ No newline at end of file +hg38_RefSeq /galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF /galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/hg38-RefSeq/hg38RefSeq77UCSCAug29-2016GTF-polyA200.fasta /galaxy-prod/galaxy/tools-dependencies/references/GTF/hg38RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt +mm10_RefSeq /galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta.GTF /galaxy-prod/galaxy/tools-dependencies/references/hisat2-index/mm10-RefSeq/mm10RefSeq77UCSCAug29-2016GTF-polyA200.fasta /galaxy-prod/galaxy/tools-dependencies/references/GTF/mm10RefSeq77UCSCAug29-2016_TranscriptIDGeneName.txt + +
--- a/isoem2_isode2/README.txt Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/README.txt Fri May 26 08:13:06 2017 -0400 @@ -16,9 +16,11 @@ 4) Edit isoem_wrapper.sh and IsoDE2.sh. Change tool paths to where they are installed on your system -5) build hisat2 and/or tmap index, depending on the type of your RNA-Seq data, for the reference you will use for mapping +5) build hisat2 for the reference you will use for mapping 6) Edit the .loc file with paths to your reference files: GTF, Transcript to gene cluster, and mapper indices), and copy it under tool-data directory in Galaxy Note: If you need only one of the two mappers (say hisat2), you can replace the other path to the other maaper index with a dummy path 7) copy the entry in the tool_data_table_conf.xml file provided in this repository to the tool_data_table_conf.xml in your Galaxy config directory + +
--- a/isoem2_isode2/isoDE.xml Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/isoDE.xml Fri May 26 08:13:06 2017 -0400 @@ -19,7 +19,7 @@ </command> <inputs> - <param name="sampleName1" size="10" type="text" label="Name for Condition 1" value="Condition1"/> + <param name="sampleName1" size="10" type="text" label="Name for Condition 1" value="Condition1" help="Output files label"/> <param name="condition1" type="data" label="Select data for Condition 1" format="gz" help="Condition 1 isoEM2 compressed output file"/> <!-- @@ -29,7 +29,7 @@ <param name="c1Rep" label="Add replicate" type="data" format="gz" data_ref="condtion1" /> </repeat> - <param name="sampleName2" size="10" type="text" label="Name for Condition 2" value="Condition2"/> + <param name="sampleName2" size="10" type="text" label="Name for Condition 2" value="Condition2" help="Output files label"/> <param format="gz" name="condition2" type="data" label="Select data for Condition 2" help="Condition 2 isoEM2 compressed output file"/> <!--
--- a/isoem2_isode2/isoem_wrapper.sh Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/isoem_wrapper.sh Fri May 26 08:13:06 2017 -0400 @@ -1,6 +1,5 @@ #!/bin/bash - echo $@ echo pwd pwd @@ -9,347 +8,189 @@ tmapPath=${toolpath}/bin prinseqPath=${toolpath}/bin bedtoolsPath=${toolpath}/bin - - -#tmapPath=/usr/local/bin -#bedtoolsPath=/usr/bin -#hisat2Path=/usr/local/bin - tempDir=/galaxy-prod/tmp - - isoem2Path=${toolpath}/bin/isoem2/bin -#exit; arg=($*) i=0 for a in ${arg[*]} do ((i++)) - if [ "$a" == "--input1" ]; then + if [ "$a" == "--input1" ]; then RNAseq_1=${arg[i]} fi - if [ "$a" == "--input2" ]; then + if [ "$a" == "--input2" ]; then RNAseq_2=${arg[i]} fi - - if [ "$a" == "--GTF" ]; then + + if [ "$a" == "--GTF" ]; then GTF_file=${arg[i]} fi - if [ "$a" == "--TMAP_INDEX" ]; then + if [ "$a" == "--TMAP_INDEX" ]; then TMAP_INDEX_file=${arg[i]} fi - if [ "$a" == "--HISAT2_INDEX" ]; then + if [ "$a" == "--HISAT2_INDEX" ]; then HISAT2_INDEX_file=${arg[i]} fi - if [ "$a" == "--Cluster" ]; then + if [ "$a" == "--Cluster" ]; then Cluster_file=${arg[i]} fi - - if [ "$a" == "-m" ]; then + + if [ "$a" == "-m" ]; then M=${arg[i]} fi - - if [ "$a" == "-d" ]; then + + if [ "$a" == "-d" ]; then D=${arg[i]} fi - if [ "$a" == "--out_gene_fpkm" ]; then + if [ "$a" == "--out_gene_fpkm" ]; then out_gene_fpkm=${arg[i]} fi - if [ "$a" == "--out_gene_tpm" ]; then + if [ "$a" == "--out_gene_tpm" ]; then out_gene_tpm=${arg[i]} fi - if [ "$a" == "--out_iso_fpkm" ]; then + if [ "$a" == "--out_iso_fpkm" ]; then out_iso_fpkm=${arg[i]} fi - if [ "$a" == "--out_iso_tpm" ]; then + if [ "$a" == "--out_iso_tpm" ]; then out_iso_tpm=${arg[i]} fi - if [ "$a" == "--out_bootstrap" ]; then + if [ "$a" == "--out_bootstrap" ]; then out_bootstrap=${arg[i]} fi - if [ "$a" == "--RNA_type" ]; then + if [ "$a" == "--RNA_type" ]; then RNAseqType=${arg[i]} fi - if [ "$a" == "--fastaFile" ]; then + if [ "$a" == "--fastaFile" ]; then FastaFile=${arg[i]} + fasta=yes fi - - if [ "$a" == "--MinReadLength" ]; then + + if [ "$a" == "--MinReadLength" ]; then MinReadLengthNum=${arg[i]} fi done - - -if [ "${RNAseqType}" == "Ion-Torrent-Proton" ] -then - echo ${TMAP_INDEX_file} - echo Align the RNAseq_sample fastq to transcriptome using TMAP - - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam -# fi - - file_type=`echo $f | tail -c 6` - echo file type $file_type - - if [ "$file_type" == "fastq" ]; then - - #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam - cp ${RNAseq_1} RNAseq_1.fastq - fi - - file_type=`echo $f | tail -c 4` - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" - - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - +#Sahar 5/20/2017 adding support for user uploaded reference transcriptome +if [ "${fasta}" == "yes" ] +then + echo building reference index and GTF + ln -s ${FastaFile} reference.fasta + ${hisat2Path}/hisat2-build reference.fasta reference.fasta + HISAT2_INDEX_file=reference.fasta + ${isoem2Path}/fastaToGTF reference.fasta + ls -ltrh + GTF_file=reference.fasta.GTF + awk '{print substr($12,2,length($12)-3)}' reference.fasta.GTF | sort | uniq | awk '{print $1 "\t" $1}' > cluster.txt + Cluster_file=cluster.txt + ls ${Cluster_file} -ltr + ls ${GTF_file} -ltr + ls ${HISAT2_INDEX_file} -ltr +fi -# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam - fi - - echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq - - echo filter the RNA fastq QC less than 20 and duplicates - perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_len ${MinReadLengthNum} -min_qual_mean 20 - -# rm RNA_1.fastq - - echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq - - ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam - +if [ "${RNAseqType}" == "Illumina-paired-end" ] +then + f=$(basename ${RNAseq_1}) -elif [ "${RNAseqType}" == "Ion-Torrent-Proton-mm9" ] -then - echo ${TMAP_INDEX_file} - echo Align the RNAseq_sample fastq to transcriptome using TMAP - - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam -# fi - file_type=`echo $f | tail -c 6` - echo file type $file_type - if [ "$file_type" == "fastq" ]; then - - #${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r ${RNAseq_1} -s RNAseq_transcriptome.sam - cp ${RNAseq_1} RNAseq_1.fastq + ln ${RNAseq_1} RNAseq_1.fastq + ln ${RNAseq_2} RNAseq_2.fastq fi file_type=`echo $f | tail -c 4` - - if [ "$file_type" == "bam" ]; then - + if [ "$file_type" == "bam" ]; then echo "Convert BAM to fastq" - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - - -# ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNAseq_1.fastq -s RNAseq_transcriptome.sam + ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq fi - echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + echo @@@@@@ Number of raw Ilumina RNA-Seq reads @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq - echo filter the RNA fastq QC less than 20 and duplicates - perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} - -# rm RNA_1.fastq - - echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq - - ${tmapPath}/tmap map4 -a 2 -g 3 -n 8 -f ${TMAP_INDEX_file} -r RNA_QC_dup.fastq -s RNAseq_transcriptome.sam - + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} -elif [ "${RNAseqType}" == "Illumina-paired-end" ] -then - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then + echo @@@@@@ Number of cleaned Ilumina RNA-Seq reads @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq -# echo "Unzip fastq files" + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNA_QC_dup_1.fastq -2 RNA_QC_dup_2.fastq \ + --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 16 > RNAseq_transcriptome.sam -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# gunzip -c ${RNAseq_2} > RNAseq_2.fastq -# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam -# fi + rm RNA*.fastq + +else # [ "${RNAseqType}" == "Illumina-single-end" || "${RNAseqType}" == "Ion-Torrent-Proton" ] Sahar 05/05/2017 hisat2 is used for Proton - exact code as Illumina single + f=$(basename ${RNAseq_1}) file_type=`echo $f | tail -c 6` - if [ "$file_type" == "fastq" ]; then - - -# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 ${RNAseq_1} -2 ${RNAseq_2} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - cp ${RNAseq_1} RNAseq_1.fastq - cp ${RNAseq_2} RNAseq_2.fastq - - + ln ${RNAseq_1} RNAseq_1.fastq fi file_type=`echo $f | tail -c 4` - - if [ "$file_type" == "bam" ]; then - + if [ "$file_type" == "bam" ]; then echo "Convert BAM to fastq" - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_2} -fq RNAseq_2.fastq -# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNAseq_1.fastq -2 RNAseq_2.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam fi - - echo @@@@@@ Number of raw reads not paired in Ilumina RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq RNAseq_2.fastq - - - echo filter the RNA fastq QC less than 20 and duplicates - perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -fastq2 RNAseq_2.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} - - echo @@@@@@ Number of cleaned reads not paired in Ilumina RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup_1.fastq RNA_QC_dup_2.fastq + echo @@@@@@ Number of raw Ilumina RNA-Seq reads @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -1 RNA_QC_dup_1.fastq -2 RNA_QC_dup_2.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam - -else - f=$(basename ${RNAseq_1}) -# file_type=`echo $f | tail -c 9` - -# if [ "$file_type" == "fastq.gz" ]; then - -# echo "Unzip fastq files" - -# gunzip -c ${RNAseq_1} > RNAseq_1.fastq -# /usr/local/bin/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam -# fi + echo filter the RNA fastq QC less than 20 and duplicates + perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} - file_type=`echo $f | tail -c 6` - - if [ "$file_type" == "fastq" ]; then - cp ${RNAseq_1} RNAseq_1.fastq - - #${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U ${RNAseq_1} --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - if [ "$file_type" == "bam" ]; then - - echo "Convert BAM to fastq" + echo @@@@@@ Number of cleaned Ilumina RNA-Seq reads @@@@@@@ + python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq - ${bedtoolsPath}/bedtools bamtofastq -i ${RNAseq_1} -fq RNAseq_1.fastq -# ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNAseq_1.fastq --no-discordant --no-mixed --sensitive --no-unal -p 8 > RNAseq_transcriptome.sam - fi - - echo @@@@@@ Number of raw reads in Proton RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNAseq_1.fastq + ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNA_QC_dup.fastq \ + --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 16 > RNAseq_transcriptome.sam - echo filter the RNA fastq QC less than 20 and duplicates - perl ${prinseqPath}/prinseq-lite.pl -fastq RNAseq_1.fastq -out_good RNA_QC_dup -out_bad null -min_qual_mean 20 -min_len ${MinReadLengthNum} - -# rm RNA_1.fastq - - echo @@@@@@ Number of cleaned reads in Proton RNA tumor @@@@@@@ - python /galaxy-prod/galaxy/tools/EpiSeq_Human/EpiSeq_CP_RNA_PE/v1_hg38/calculate_stat_fastq.py RNA_QC_dup.fastq - ${hisat2Path}/hisat2 -x ${HISAT2_INDEX_file} -U RNA_QC_dup.fastq --no-discordant --no-mixed --sensitive --no-unal --no-spliced-alignment -p 8 > RNAseq_transcriptome.sam + rm RNA*.fastq fi - echo Sorting - LANG=C sort -T ${tempDir} -k 1,1 RNAseq_transcriptome.sam > aligned_reads_sorted.sam if [ "${RNAseqType}" == "Illumina-paired-end" ] -then - echo IsoEM for RNAseq mapped to transcriptome - ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -a aligned_reads_sorted.sam +then + echo IsoEM for paired RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 --polyA 200 -a aligned_reads_sorted.sam else - echo IsoEM for RNAseq mapped to transcriptome - ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam + echo IsoEM for single-end RNAseq mapped to transcriptome + ${isoem2Path}/isoem2 -G ${GTF_file} -c ${Cluster_file} --polyA 200 -C 95 -m ${M} -d ${D} aligned_reads_sorted.sam fi echo Join estimates files with ci files -echo ls -#ls ./aligned_reads_sorted/ -ltr - join ./aligned_reads_sorted/output/Genes/gene_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_fpkm_ci >333 awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' 333 > gene_fpkm join ./aligned_reads_sorted/output/Genes/gene_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/gene_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > gene_tpm join ./aligned_reads_sorted/output/Isoforms/iso_fpkm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_fpkm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_fpkm join ./aligned_reads_sorted/output/Isoforms/iso_tpm_estimates ./aligned_reads_sorted/output/ConfidenceIntervals/iso_tpm_ci |awk '{print $1 "\t" $2 "\t" $3 "\t" $4}' > iso_tpm - -#echo Adding output directory to bootstap archive -# -#echo ls -#ls ./aligned_reads_sorted/ -ltr -# -#cd aligned_reads_sorted -#echo ls -#ls -ltrh -#gunzip bootstrap.tar.gz -#tar rf bootstrap.tar output -#gzip bootstrap.tar mv ./aligned_reads_sorted/bootstrap.tar.gz ${out_bootstrap} - -#echo ls after gz -#ls -ltr -# -#cd .. -#pwd - - -#gunzip ./aligned_reads_sorted/bootstrap.tar.gz -#tar -rf ./aligned_reads_sorted/bootstrap.tar ./aligned_reads_sorted/output -#gzip ./aligned_reads_sorted/bootstrap.tar - -echo ls after gz -ls -ltr - #4. Copy output files ############################################################# mv gene_fpkm ${out_gene_fpkm} -mv gene_tpm ${out_gene_tpm} +mv gene_tpm ${out_gene_tpm} mv iso_fpkm ${out_iso_fpkm} mv iso_tpm ${out_iso_tpm} @@ -364,4 +205,3 @@ -
--- a/isoem2_isode2/isoem_wrapper.xml Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/isoem_wrapper.xml Fri May 26 08:13:06 2017 -0400 @@ -16,7 +16,7 @@ --MinReadLength $MinReadLength ## Handle reference file . - #if $referenceSource.CCDSsource == "history": + #if $referenceSource.TranscriptomeSource == "history": --fastaFile $referenceSource.fastaFile #else: --GTF $referenceSource.index.fields.GTF --TMAP_INDEX $referenceSource.index.fields.TMAP_INDEX --HISAT2_INDEX $referenceSource.index.fields.HISAT2_INDEX --Cluster $referenceSource.index.fields.Cluster @@ -43,7 +43,7 @@ <param name="sampleName" size="10" type="text" label="Sample name" value="Sample" help="Output files label"/> <conditional name="referenceSource"> - <param name="CCDSsource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options"> + <param name="TranscriptomeSource" type="select" label="Will you upload a reference transcriptome fasta file from your history or use a built-in reference?" help="Built-ins were indexed using default options"> <option value="indexed">Use a built-in reference</option> <option value="history">Use reference from the history</option> </param> @@ -53,7 +53,7 @@ </param> </when> <when value="history"> - <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select CCDS fasta file from your history" /> + <param name="fastaFile" type="data" format="fasta" metadata_name="dbkey" label="Select transcriptome fasta file from your history" /> </when> <!-- history --> </conditional> <!-- referenceSource --> <conditional name="Data"> @@ -69,16 +69,16 @@ <option value="Illumina-single-end">Illumina single-end</option> </param> <!-- RNAseqType --> <when value="Illumina-paired-end"> - <param name="input1" type="data" label="RNA-Seq file1, fastq or bam format" /> - <param name="input2" type="data" label="RNA-Seq file2, fastq or bam format" /> + <param name="input1" type="data" format="fastq" label="RNA-Seq file1, fastq format" /> + <param name="input2" type="data" format="fastq" label="RNA-Seq file2, fastq format" /> </when> <when value="Ion-Torrent-Proton"> - <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" /> + <param name="input1" type="data" format="fastq" label="RNA-Seq file, fastq format" /> <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" /> <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> </when> <when value="Illumina-single-end"> - <param name="input1" type="data" label="RNA-Seq file, fastq or bam format" /> + <param name="input1" type="data" format="fastq" label="RNA-Seq file, fastq format" /> <param name="lengthMean" type="text" label="m (RNA-Seq fragment length mean)" /> <param name="lengthSd" type="text" label="d (RNA-Seq fragment length standard deviation)" /> </when> @@ -111,7 +111,7 @@ **Input Format** -* The IsoEM2 tool can process RNA-seq reads generated by both Ion Torrent and Illumina platforms. RNA-Seq reads must be provided in fastq, fastq.gz, or bam formats. +* The IsoEM2 tool can process RNA-seq reads generated by both Ion Torrent and Illumina platforms. RNA-Seq reads must be provided in fastq format. **Output Format** @@ -127,6 +127,10 @@ ----- +**BUILT-IN REFERENCES** + +* All reference files used in this pipeline can be found at http://dna.engr.uconn.edu/tmp/galaxy/tool-data/IsoEM.loc +* </help>
--- a/isoem2_isode2/tool_data_table_conf.xml Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/tool_data_table_conf.xml Fri May 26 08:13:06 2017 -0400 @@ -1,10 +1,10 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> -<tables> - <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> - <table name="IsoEM" comment_char="#"> - <columns>value, GTF, TMAP_INDEX, HISAT2_INDEX, Cluster </columns> - <file path="tool-data/IsoEM.loc" /> - </table> - -</tables> - +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> +<tables> + <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> + <table name="IsoEM" comment_char="#"> + <columns>value, GTF, HISAT2_INDEX, Cluster </columns> + <file path="tool-data/IsoEM.loc" /> + </table> + +</tables> +
--- a/isoem2_isode2/tool_data_table_conf.xml.sample Fri May 26 07:48:46 2017 -0400 +++ b/isoem2_isode2/tool_data_table_conf.xml.sample Fri May 26 08:13:06 2017 -0400 @@ -1,10 +1,10 @@ -<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> -<tables> - <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> - <table name="IsoEM" comment_char="#"> - <columns>value, GTF, TMAP_INDEX, HISAT2_INDEX, Cluster </columns> - <file path="tool-data/IsoEM.loc" /> - </table> - -</tables> - +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changedin revision 4550:535d276c92bc--> +<tables> + <!-- Locations of genome/transcriptome indices, genome file, fastq file, GTF file, etc for Epi-Seq --> + <table name="IsoEM" comment_char="#"> + <columns>value, GTF, HISAT2_INDEX, Cluster </columns> + <file path="tool-data/IsoEM.loc" /> + </table> + +</tables> +