Mercurial > repos > vandelj > giant_limma_analysis
changeset 0:f274c8d45613 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author | vandelj |
---|---|
date | Fri, 26 Jun 2020 09:43:41 -0400 |
parents | |
children | 9f2ddab68c9e |
files | galaxy/wrappers/DiffExprLimma.xml galaxy/wrappers/tool-data/LimmaTool.loc.sample galaxy/wrappers/tool_data_table_conf.xml.sample src/ExprPlotsScript.R src/General_functions.py src/LIMMA_options.py src/LIMMAscriptV4.R src/VolcanoPlotsScript.R src/getopt.R src/heatMapClustering.R src/utils.R |
diffstat | 11 files changed, 5724 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/wrappers/DiffExprLimma.xml Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,616 @@ +<tool name="GIANT-Differential Expression with LIMMA" id="giant_limma_analysis" version="0.3.8"> + <description>Use LIMMA to detect differentially expressed genes</description> + <requirements> + <requirement type="package" version="1.7.1">r-r.methodss3</requirement> + <requirement type="package" version="3.36.5">bioconductor-limma</requirement> + <requirement type="package" version="2.36.1">bioconductor-biomart</requirement> + <requirement type="package" version="3.0.0">r-ggplot2</requirement> + <requirement type="package" version="4.8.0">r-plotly</requirement> + <requirement type="package" version="1.3.1">r-stringr</requirement> + <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement> + <requirement type="package" version="1.4.32">r-statmod</requirement> + </requirements> + <code file="../../src/LIMMA_options.py"/> + <stdio> + <regex match="Execution halted" + source="both" + level="fatal" + description="Execution halted, please contact tool developer or administrators." /> + <regex match="Error in" + source="both" + level="fatal" + description="An error occured during R execution, please contact tool developer." /> + <exit_code range="15" level="fatal" description="Error during formating scripts, see log file for more information." /> + <exit_code range="10" level="fatal" description="Missing file during html report, see log file for more information." /> + <exit_code range="1:9" level="fatal" description="Error in R execution, see log file for more information." /> + </stdio> + <command> <![CDATA[ + bash $scriptPrepareTable; + ret_code=\$?; + if [ \$ret_code != 0 ]; then + exit \$ret_code; + fi; + + cp '$__tool_directory__/../../src/LIMMA_options.py' ./LIMMA_options.py; + + #if $blockingSection.blockingConditional.addBlocking == "true": + python -c 'import LIMMA_options;LIMMA_options.replaceNamesBlockInFiles("$inputSection.inputData","./factorTable.csv","./blockingTable.csv","./expressionRenamed.csv","./factorTableRenamed.csv","./blockingTableRenamed.csv","./dictionnaryRenamed.csv")'; + #else: + python -c 'import LIMMA_options;LIMMA_options.replaceNamesInFiles("$inputSection.inputData","./factorTable.csv","./expressionRenamed.csv","./factorTableRenamed.csv","./dictionnaryRenamed.csv")'; + #end if + + + if [ -f ./dictionnaryRenamed.csv ]; then + printf "[INFO]Renaming is done\n" >> $log; + Rscript '$__tool_directory__/../../src/LIMMAscriptV4.R' -i 'expressionRenamed.csv' -l '$log' -o '$outputData' -z '$outputDfData' -f 'pdf' + -a 'factorTableRenamed.csv' -s 'sumSquareFtest' -g 'dictionnaryRenamed.csv' + #if $blockingSection.blockingConditional.addBlocking == "true": + -b 'blockingTableRenamed.csv' + -u $advSection.confoundingPolicy + #end if + -r '${contrastSection.factorSelection}' + #for $i, $s in enumerate( $contrastSection.contrastList ) + -p '${s.groupName}' + -m '${s.firstGroupToCompare}' + -n '${s.secondGroupToCompare}' + #end for + #if $contrastSection.interactionSelection.interactionContrast == "true": + -c '$contrastSection.interactionSelection.controlSelection' + #end if + -t $plotSection.cutoffTh + -d $plotSection.FCthreshold + #if $plotSection.histogramToPlot: + -h 'Histograms' + #end if + #if $plotSection.volcanoToPlot: + -v 'Volcanos' + #end if + #if $plotSection.geneInformation.addGeneInfo: + -x '$plotSection.geneInformation.organismID' + -y '$plotSection.geneInformation.infoInRowType' + #end if + ; + ret_code=\$?; + if [ \$ret_code != 0 ]; then + exit \$ret_code; + else + bash $scriptTransfer; + ret_code=\$?; + if [ \$ret_code != 0 ]; then + exit \$ret_code; + fi + fi; + else + printf "[ERROR]Error during renaming, factor information file should not contain special characters '*',':',',','|' in factor names and possible values\n" >> $log; + exit 15; + fi; + printf "[INFO]End of tool script" >> $log; + ]]> + </command> + + + + <configfiles> + <configfile name="scriptPrepareTable"> + <![CDATA[ + awk -v fact="$contrastSection.factorSelection" 'BEGIN{OFS="";ORS="";FS="\t";split(fact,tab,",");for(i in tab)dico[tab[i]]=1} FNR==1{for(i=2;i<=NF;i++){if(\$i in dico)colToSelect[i]=1}} {print \$1;for(i in colToSelect)print "\t"\$i ; print "\n"}' $inputSection.conditionInformation > ./factorTable.csv; + + if [ ! -e ./factorTable.csv ]; then + printf "[ERROR]factorTable.csv is missing" >> $log; + exit 15 + fi + + + #if $blockingSection.blockingConditional.addBlocking == "true": + awk -v fact="$blockingSection.blockingConditional.blockingToInclude" ' BEGIN{OFS="";ORS="";FS="\t";split(fact, facto, ",");for(i in facto)factors[facto[i]]=1} FNR==1{for(i=2;i<=NF;i++)if(\$i in factors)dico[i]=1} {print $1;for(factorID in dico)print "\t"\$factorID; print "\n"}' $inputSection.conditionInformation > ./blockingTable.csv; + + if [ ! -e ./blockingTable.csv ]; then + printf "[ERROR]blockingTable.csv is missing" >> $log; + exit 15 + fi + + #end if + printf "[INFO]End of scriptPrepareTable\n" >> $log + ]]> + </configfile> + + <configfile name="scriptTableToHtml"> +<![CDATA[ +printf "<!DOCTYPE html> +<html> +<head> +<meta http-equiv=\"Content-type\" content=\"text/html; charset=utf-8\"> +<link rel=\"stylesheet\" type=\"text/css\" href=\"https://cdn.datatables.net/1.10.16/css/jquery.dataTables.min.css\"> +<script type=\"text/javascript\" language=\"javascript\" src=\"https://code.jquery.com/jquery-1.12.4.js\"> +</script> +<script type=\"text/javascript\" language=\"javascript\" src=\"https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js\"> +</script> +<script type=\"text/javascript\" class=\"init\"> +\\$(document).ready(function() { + \\$(\'\#example\').DataTable( { + \"columnDefs\": [ { + \"visible\": false, + \"targets\": -1 + } ] + } ); +} ); +</script> +</head> +<body style=\"background-color:white;\"> +<table id=\"example\" class=\"display\" cellspacing=\"0\"> +" > ${html_file.extra_files_path}/outputLIMMA.html + +printf "<colgroup>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +printf "<col span=\"2\" style=\"background-color:rgb(224,235,235)\">\n" >> ${html_file.extra_files_path}/outputLIMMA.html +awk 'BEGIN{odd=1;FS="\t"} NR==1{for(i=3;i<=NF;i=i+5){if(odd==1){odd=0;printf "<col span=\"5\" style=\"background-color:rgb(224,238,255)\">\n"}else{odd=1;printf "<col span=\"5\" style=\"background-color:rgb(255,221,224)\">\n"}}}' $outputData >> ${html_file.extra_files_path}/outputLIMMA.html +printf "</colgroup>\n" >> ${html_file.extra_files_path}/outputLIMMA.html + + +printf "<thead>\n<tr>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +printf "<th rowspan=\"2\">Gene</th>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +printf "<th rowspan=\"2\">Info</th>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +awk 'BEGIN{FS="\t"} NR==1{for(i=3;i<=NF;i=i+5)printf "<th colspan=\"5\">"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputLIMMA.html +printf "<th></th>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +printf "</tr>\n<tr>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +awk 'BEGIN{FS="\t"} NR==2{for(i=3;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputLIMMA.html +printf "<th></th>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +printf "</tr>\n</thead>\n" >> ${html_file.extra_files_path}/outputLIMMA.html + +printf "<tfoot>\n<tr>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +awk 'BEGIN{FS="\t"} NR==2{for(i=1;i<=NF;i++)printf "<th>"\$i"</th>\n"}' $outputData >> ${html_file.extra_files_path}/outputLIMMA.html +printf "<th></th>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +printf "</tr>\n</tfoot>\n">> ${html_file.extra_files_path}/outputLIMMA.html + +printf "<tbody>\n" >> ${html_file.extra_files_path}/outputLIMMA.html +awk 'BEGIN{FS="\t"} NR>2{printf "<tr>\n";for(i=1;i<=NF;i++){printf "<th>"\$i"</th>\n"};printf "<th></th>\n";printf "</tr>\n"}' $outputData >> ${html_file.extra_files_path}/outputLIMMA.html +printf "</tbody>\n" >> ${html_file.extra_files_path}/outputLIMMA.html + +printf "</table> +</body> +</html>" >> ${html_file.extra_files_path}/outputLIMMA.html + +]]> + </configfile> + + <configfile name="scriptTransfer"> +<![CDATA[ + + +##for LIMMA output table + +mkdir -p $html_file.extra_files_path + +##create HTML file for limma output table +source $scriptTableToHtml + +##check outputLIMMA.html is here +if ! [ -e ${html_file.extra_files_path}/outputLIMMA.html ]; then + printf "[ERROR]outputLIMMA.html is missing" >> $log; + exit 10 +fi + +##create header of main HTML file +printf "<!DOCTYPE html>\n<html>\n<body>" > $html_file + +##first add reference of the LIMMA output table +printf "<h3>LIMMA statistics (p.val, FC)</h3>\n" >> $html_file +printf "<a href=\"outputLIMMA.html\">LIMMA results</a>\n" >> $html_file + + + +#if $plotSection.histogramToPlot: + +printf "<h3>P-val histograms</h3>\n" >> $html_file + +##create folders in media +counter=1 +for histogram in \$(ls ./plotLyDir/Histograms_*html) +do +histogramShort=\${histogram%\.*} +histogramShort=\${histogramShort\#\#*/} + +conditionName=\${histogram%\.*} +conditionName=\${conditionName\#\#*Histograms_} + +echo \$conditionName > ./temporaryConditionName +conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{print dico[\$1]}' ./correspondanceFileNames.csv ./temporaryConditionName) + +##modify HTML to point to plotLy folder +sed -i "s/\${histogramShort}_files/PlotLy_Histogram_scripts/g" \$histogram + +##copy HTML files in both folders +cp \$histogram ${html_file.extra_files_path}/Histogram_\$conditionName.html + +##add HTML link +printf "<p>\n<a href=\"Histogram_\$conditionName.html\">Histogram \$conditionFormatedName</a>\n</p>\n" >> $html_file + +if [ \$counter = 1 ]; then +#if $plotSection.imagePlotlyFormat=="svg": +##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work) +cd ./plotLyDir/\${histogramShort}_files/plotly-main-*/ +awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js +awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js +rm ./plotly-latest.minTemp.js +cd ../../../ +#end if + +##now copy only scripts folder for the first histogram and rename +cp -r ./plotLyDir/\${histogramShort}_files $html_file.extra_files_path +mv ${html_file.extra_files_path}/\${histogramShort}_files ${html_file.extra_files_path}/PlotLy_Histogram_scripts +fi + +((counter++)) +done + +if [ \$counter = 1 ]; then + printf "[ERROR]Histograms are missing" >> $log; + exit 10 +fi + +#end if + + +if [ -e ./plotLyDir/sumSquareFtest.html ]; then + +printf "<h3>Source of variation</h3>\n" >> $html_file + +##modify HTML to point to the first script folder +sed -i "s/sumSquareFtest_files/PlotLy_sumSquareFtest_scripts/g" ./plotLyDir/sumSquareFtest.html + +##copy HTML files in both folders +cp ./plotLyDir/sumSquareFtest.html ${html_file.extra_files_path}/ + +##add HTML link +printf "<a href=\"sumSquareFtest.html\">F-ratio barplot</a>\n" >> $html_file + +#if $plotSection.imagePlotlyFormat=="svg": +##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work) +cd ./plotLyDir/sumSquareFtest_files/plotly-main-*/ +awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js +awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js +rm ./plotly-latest.minTemp.js +cd ../../../ +#end if + +##now copy scripts folder +cp -r ./plotLyDir/sumSquareFtest_files $html_file.extra_files_path +mv ${html_file.extra_files_path}/sumSquareFtest_files ${html_file.extra_files_path}/PlotLy_sumSquareFtest_scripts + +else + printf "[ERROR]sumSquareFtest plot is missing" >> $log; + exit 10 +fi + + + +#if $plotSection.volcanoToPlot: + +printf "<h3>Volcanos</h3>\n" >> $html_file + +##create folders in media +counter=1 +for volcano in \$(ls ./plotLyDir/Volcanos_*html) +do +volcanoShort=\${volcano%\.*} +volcanoShort=\${volcanoShort\#\#*/} + +conditionName=\${volcano%\.*} +conditionName=\${conditionName\#\#*Volcanos_} + +echo \$conditionName > ./temporaryConditionName +conditionFormatedName=\$(awk 'BEGIN{FS="\t"} ARGIND==1{dico[\$1]=\$2} ARGIND==2{print dico[\$1]}' ./correspondanceFileNames.csv ./temporaryConditionName) + + +##modify HTML to point to plotLy folder +sed -i "s/\${volcanoShort}_files/PlotLy_Volcano_scripts/g" \$volcano + +##copy HTML files in both folders +cp \$volcano ${html_file.extra_files_path}/Volcano_\$conditionName.html + +##add HTML link +printf "<p>\n<a href=\"Volcano_\$conditionName.html\">Volcano \$conditionFormatedName</a>\n</p>\n" >> $html_file + +if [ \$counter = 1 ]; then + +#if $plotSection.imagePlotlyFormat=="svg": +##before copying scripts folder modify them to replace png snapshot with svg (not proud of solution but seems to work) +cd ./plotLyDir/\${volcanoShort}_files/plotly-main-*/ +awk '{gsub("\"png\"","\"svg\"",\$0);print \$0}' ./plotly-latest.min.js > ./plotly-latest.minTemp.js +awk '{gsub("Download plot as a png","Download plot as a svg",\$0);print \$0}' ./plotly-latest.minTemp.js > ./plotly-latest.min.js +rm ./plotly-latest.minTemp.js +cd ../../../ +#end if + +##now copy only scripts folder for the first volcano and rename +cp -r ./plotLyDir/\${volcanoShort}_files $html_file.extra_files_path +mv ${html_file.extra_files_path}/\${volcanoShort}_files ${html_file.extra_files_path}/PlotLy_Volcano_scripts +fi + +((counter++)) +done + +if [ \$counter = 1 ]; then + printf "[ERROR]Volcano plots are missing" >> $log; + exit 10 +fi + +#end if + + +##create footer of HTML file +printf "</body>\n</html>" >> $html_file +]]> + </configfile> + </configfiles> + + + + <inputs> + <section name="inputSection" title="Input files" expanded="True"> + <param type="text" name="title" value="LIMMA_toPersonalize" label="Title for output"/> + + <param type="data" name="inputData" format="tabular" label="Normalized expression tabular file" optional="false" multiple="false" help="Expression data have to be log2 transformed."/> + + <param type="data" name="conditionInformation" format="tabular" label="Factor information tabular file" optional="false" multiple="false"> + </param> + </section> + + <section name="contrastSection" title="Contrast definition" expanded="True"> + + <param name="factorSelection" type="select" label="Select all factors to include in the global model (excepting confounding factors)" refresh_on_change="true" display="checkboxes" optional="false" multiple="true" dynamic_options="get_column_names(inputSection['conditionInformation'].file_name)" help="Confounding factors are selected in the coresponding section below." /> + + <repeat name="contrastList" title="Contrast"> + <param type="text" name="groupName" value="" label="Contrast name"/> + + <param name="firstGroupToCompare" type="select" label="Select factor levels of 1st group" display="checkboxes" multiple="true" refresh_on_change="true" dynamic_options="get_row_names_allInteractions(inputSection['conditionInformation'].file_name,factorSelection)"> + </param> + + <param name="secondGroupToCompare" type="select" label="Select factor levels of 2nd group" display="checkboxes" multiple="true" refresh_on_change="true" dynamic_options="get_row_names_allInteractions(inputSection['conditionInformation'].file_name,factorSelection)"> + </param> + </repeat> + + <conditional name="interactionSelection"> + <param name="interactionContrast" type="boolean" label="Add interaction contrasts" checked="false" help="If you have selected two factors at least."/> + <when value="true"> + <param name="controlSelection" type="select" label="Select one control level for each factor (and only one)" display="checkboxes" multiple="true" refresh_on_change="true" dynamic_options="get_allrow_names(inputSection['conditionInformation'].file_name,factorSelection)"> + </param> + </when> + <when value="false"> + </when> + </conditional> + + </section> + + + + <section name="blockingSection" title="Paired analysis/confounding factor" expanded="True"> + <conditional name="blockingConditional"> + <param name="addBlocking" type="boolean" label="Add confounding factors" checked="false" help="To control factors producing spurious association as batch effects or to analyze paired data"/> + <when value="true"> + <param name="blockingToInclude" type="select" label="Select confounding factors" multiple="true" dynamic_options="get_column_names(inputSection['conditionInformation'].file_name)"> + <validator type="empty_field" message="You should select at least one factor"></validator> + </param> + </when> + <when value="false"> + </when> + </conditional> + </section> + + <section name="plotSection" title="Output section" expanded="True"> + <param name="cutoffTh" type="float" value="0.05" label="Output FDR p-val threshold" > + <validator type="in_range" min="0" max="1" message="Threshold should be between 0 and 1"/> + </param> + <param name="FCthreshold" type="float" value="2" label="Output Fold Change threshold (both 'log2(threshold)' and 'log2(1/threshold)' values will be used)" > + <validator type="in_range" min="1" exclude_min="false" message="Threshold should be greater or equal to 1"/> + </param> + <param type="boolean" name="histogramToPlot" checked="true" label="Plot histograms" help="Plot nominal p-val distribution for each comparison."> + </param> + <param type="boolean" name="volcanoToPlot" checked="true" label="Plot volcanos" help="Plot volcano for each comparison."> + </param> + + <conditional name="geneInformation"> + <param name="addGeneInfo" type="boolean" label="Add gene/probe information" checked="false"/> + <when value="true"> + <param name="organismID" label="Organism" type="select"> + <options from_data_table="LimmaTool" > + <column name="name" index="1"/> + <column name="value" index="0"/> + <filter type="unique_value" column="0"/> + </options> + </param> + <param name="infoInRowType" label="Nature of row names" type="select"> + <options from_data_table="LimmaTool" > + <column name="name" index="3"/> + <column name="value" index="2"/> + <filter type="param_value" ref="organismID" column="0"/> + </options> + </param> + </when> + <when value="false"> + </when> + </conditional> + + <param type="select" name="imagePlotlyFormat" display="radio" label="Html snapshot format"> + <option value="png">PNG format</option> + <option value="svg">SVG format</option> + </param> + </section> + + <section name="advSection" title="Advanced parameters" expanded="False"> + <param type="select" name="confoundingPolicy" display="radio" label="Confounding effect policy" help="DO NOT modify this parameter unless you know what you are doing!"> + <option value="additive">As an additive factor (default)</option> + <option value="correlated">As a correlation constraint</option> + </param> + </section> + + </inputs> + + + + <outputs> + <data format="tabular" name="outputData" label="${inputSection.title}_LIMMAstatistics"/> + + <data format="tabular" name="outputDfData" label="${inputSection.title}_LIMMAdetailed"/> + + <data format="html" name="html_file" label="${inputSection.title}_HTML.html"/> + <!-- + <collection name="outputHistogramsList" label="${inputSection.title}_HistogramsList" type="list"> + <discover_datasets pattern="(?P<designation>Histograms[0-9]+)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> + <filter>plotSection['histogramToPlot']</filter> + </collection> + + <collection name="outputVolcanosList" label="${inputSection.title}_VolcanosList" type="list"> + <discover_datasets pattern="(?P<designation>Volcanos\_.*)\.(?P<ext>[^\._]+)?" directory="plotDir" visible="false"/> + <filter>plotSection['volcanoToPlot']</filter> + </collection> + + <data name="fRatioOutput" format="png" label="${inputSection.title}_F-ratio"> + <change_format> + <when input="plotSection['imageFormat']" value="pdf" format="pdf" /> + </change_format> + </data> + --> + <data format="txt" name="log" label="${inputSection.title}_Log" /> + </outputs> + + + + <tests> + <test maxseconds="3600"> + <param name="wfile" value="wiggle.wig" /> + <param name="bfile" value="bedfile.bed" /> + <param name="span" value="3000" /> + <param name="pfres" value="50" /> + <param name="lowersize" value="1000" /> + <param name="middlesize" value="2000" /> + <param name="uppersize" value="3000" /> + <param name="lowerbisize" value="2500" /> + <param name="upperbisize" value="5000" /> + <param name="reldist" value="3000" /> + <param name="genome" value="hg18" /> + <param name="imagetype" value="PDF" /> + <param name="enable" value="no" /> + <output name="outputData" file="ceas_1/ceas_1.pdf" /> + <output name="outputDfData" file="ceas_1/ceas_1.pdf" /> + </test> +</tests> + <help> +<![CDATA[ +**What it does ?** + +This tool compute differential expressions for a set of genes. +This tool use the LIMMA function from R package limma. (only adapted for arrays data) + +----- + +**Parameters** + +\- **Input Files** + +- **Title** to personalize output file names (please avoid special characters). +- **Normalized expression tabular file** with samples as columns and genes as rows (header contains sample names and first column gene identifiers). WARNING Expression data have to be already log2 transformed. + + :: + + Conditions 157_(HuGene-2_0-st).CEL 156_(HuGene-2_0-st).CEL 155_(HuGene-2_0-st).CEL 154_(HuGene-2_0-st).CEL + DDX11L2 4.500872 4.429759 4.780281 4.996189 + MIR1302-2 3.415065 3.520472 3.471503 3.567988 + OR4F5 3.737956 3.011586 3.424494 3.497545 + VWA1 5.189621 5.129595 4.806793 5.227014 + +- **Factor information tabular file** with factors as columns and samples as rows (header contains factor names and first column sample names). + + :: + + Conditions Sex Treatment Reaction + 138_(HuGene-2_0-st).CEL 1 TreatA Pos + 148_(HuGene-2_0-st).CEL 0 NoTreat Pos + 139_(HuGene-2_0-st).CEL 0 TreatB Neg + 149_(HuGene-2_0-st).CEL 0 NoTreat Neg + +\- **Contrast definition** + +- **Select all factors to include in the global model** : automatically extracted from factor information file. Confounding factors or paired-analysis information (replicates number,dates...) should not be selected here, but in specific section below. + +- **Contrast** + + \- **Contrast Name** displayed in output file + + \- **Select factor levels of 1st group** : the contrast will be computed with the formula "1st group versus 2nd group" so "1st group - 2nd group". + + \- **Select factor levels of 2nd group** : see below for some examples + + :: + + Nb Factors List of Factors Factor levels Expected Contrast Select as 1st gr Select as 2nd gr + 1 Strain WT, KO KO vs WT KO WT + 2 Strain, Treatment WT, KO, NoTreat,TreatA TreatA vs NoTreat in WT mice TreatA*WT NoTreat*WT + 2 Strain, Treatment WT, KO, NoTreat,TreatA Treatment effect on Strain dependent genes See results of interaction contrast + +- **Add interaction contrasts** : to compute automatically each level of interaction + + \- **Control groups** for each factor, select its level used as control. Thus interaction contrasts will be computed for each factor level regarding to this control level. + +\- **Paired analysis / confounding factors** + +- **Add confounding factors** which can define "blocks" in the data different from those selected previously in the global model. Typically confounding factors are linked to batch effect (dates...) or paired-analysis situation (replicates number...). + +\- **Output section** + +- **Output FDR p-val threshold**, only genes with FDR <= this threshold (in at least one of defined contrasts) are kept in tabular result file and displayed dynamically in volcano plot. + +- **Plot histograms** of unadjusted p-values for each defined contrast. + +- **Plot volcanos** for each defined contrast with specified FDR p-val and FC thresholds. + +- **Output Fold Change threshold** only genes with absolute FC >= this threshold (in at least one of defined contrast) are kept in tabular result file and displayed dynamically in volcano plot (both 'log2(threshold)' and 'log2(1/threshold)' values will be used). + +- **Add gene/probe information** : if yes, add description of genes to the result tab. + + \- **Organism** coresponding to experimental data used. + + \- **Nature of row names** coresponding to experimental data used in input. + +- **Html snapshot format** : for interactive plotly plots. + +\- **Advanced Parameters** + +- **Confounding effect policy** : DO NOT modify this parameter unless you know what you are doing! See Limma documentation for more information. + +----- + +**Outputs** + +- **LIMMA statistic tabular** is the main result file for LIMMA, represented as tab delimited matrix. First and second columns contain respectively gene names and information grabbed from BiomaRt R package. Then, the following colums contain differential expression statistics (p.val, FDR, FC, log2FC and t-statistic) for defined contrasts, for each gene (in rows). + +- **LIMMA detailed tabular** contains specific statistics required for additional analysis tools (eg SMAGEXP tool), represented as tab delimited matrix where each colum contains specific statistics (residual, eBayes prior and total degree of freedom) between groups for each gene (in rows). + +- **HTML file** to access interactive version of histograms, F-Ratio barplots and volcanos through PlotLy html pages and tabulated differential results. + +- **LOG file** for job log. If you see errors, please attached this in the bug report + +]]> </help> + + + <citations> + <citation type="bibtex">@misc{vandel_jimmy_2018_1477870, author = {Vandel, J. and Gheeraert, C. and Eeckhoute, J. and Staels, B. and Lefebvre, P. and Dubois-Chevalier, J.}, title = {GIANT: Galaxy-based Interactive tools for ANalaysis of Transcriptomic data}, month = nov, year = 2018, doi = {10.5281/zenodo.1477870}, url = {https://doi.org/10.5281/zenodo.1477870} + }</citation> + + <citation type="bibtex">@article{doi:10.1093/nar/gkv007, + author = {Ritchie, Matthew E. and Phipson, Belinda and Wu, Di and Hu, Yifang and Law, Charity W. and Shi, Wei and Smyth, Gordon K.}, + title = {limma powers differential expression analyses for RNA-sequencing and microarray studies}, + journal = {Nucleic Acids Research}, + volume = {43}, + number = {7}, + pages = {e47}, + year = {2015}, + doi = {10.1093/nar/gkv007}, + URL = {http://dx.doi.org/10.1093/nar/gkv007}, + eprint = {/oup/backfile/content_public/journal/nar/43/7/10.1093_nar_gkv007/2/gkv007.pdf} + }</citation> + + <citation type="bibtex">@online{plotly, author = {Plotly Technologies Inc.}, title = {Collaborative data science}, publisher = {Plotly Technologies Inc.}, address = {Montreal, QC}, year = {2015}, url = {https://plot.ly} + }</citation> + </citations> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/wrappers/tool-data/LimmaTool.loc.sample Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,854 @@ +#This file lists the locations of all files required by apt tool +#under the "apt" directory (a directory that contains a directory +#for each chip). This file has the TAB delimited format : +# +#<organismID> <organismName> <rowTypeID> <rowTypeName> +# +# +csavignyi_gene_ensembl C.savignyi genes (CSAV 2.0) ensembl_gene_id Gene stable ID +csavignyi_gene_ensembl C.savignyi genes (CSAV 2.0) external_gene_name Gene name +csavignyi_gene_ensembl C.savignyi genes (CSAV 2.0) entrezgene NCBI gene ID +xtropicalis_gene_ensembl Xenopus genes (JGI 4.2) affy_x_tropicalis AFFY X tropicalis probe +xtropicalis_gene_ensembl Xenopus genes (JGI 4.2) ensembl_gene_id Gene stable ID +xtropicalis_gene_ensembl Xenopus genes (JGI 4.2) external_gene_name Gene name +xtropicalis_gene_ensembl Xenopus genes (JGI 4.2) entrezgene NCBI gene ID +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hc_g110 AFFY HC G110 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_focus AFFY HG Focus probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u133a AFFY HG U133A probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u133b AFFY HG U133B probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u95a AFFY HG U95A probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u95av2 AFFY HG U95Av2 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u95b AFFY HG U95B probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u95c AFFY HG U95C probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u95d AFFY HG U95D probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hg_u95e AFFY HG U95E probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hta_2_0 AFFY HTA 2 0 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hugenefl AFFY HuGeneFL probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_primeview AFFY PrimeView probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_rhesus AFFY Rhesus probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) affy_u133_x3p AFFY U133 X3P probe +panubis_gene_ensembl Olive baboon genes (Panu_3.0) ensembl_gene_id Gene stable ID +panubis_gene_ensembl Olive baboon genes (Panu_3.0) external_gene_name Gene name +panubis_gene_ensembl Olive baboon genes (Panu_3.0) entrezgene NCBI gene ID +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hc_g110 AFFY HC G110 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_focus AFFY HG Focus probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u133a AFFY HG U133A probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u133b AFFY HG U133B probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u95a AFFY HG U95A probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u95av2 AFFY HG U95Av2 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u95b AFFY HG U95B probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u95c AFFY HG U95C probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u95d AFFY HG U95D probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hg_u95e AFFY HG U95E probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hta_2_0 AFFY HTA 2 0 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hugenefl AFFY HuGeneFL probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_primeview AFFY PrimeView probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_rhesus AFFY Rhesus probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) affy_u133_x3p AFFY U133 X3P probe +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) ensembl_gene_id Gene stable ID +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) external_gene_name Gene name +mnemestrina_gene_ensembl Pig-tailed macaque genes (Mnem_1.0) entrezgene NCBI gene ID +sscrofa_gene_ensembl Pig genes (Sscrofa11.1) affy_porcine AFFY Porcine probe +sscrofa_gene_ensembl Pig genes (Sscrofa11.1) ensembl_gene_id Gene stable ID +sscrofa_gene_ensembl Pig genes (Sscrofa11.1) external_gene_name Gene name +sscrofa_gene_ensembl Pig genes (Sscrofa11.1) entrezgene NCBI gene ID +ogarnettii_gene_ensembl Bushbaby genes (OtoGar3) ensembl_gene_id Gene stable ID +ogarnettii_gene_ensembl Bushbaby genes (OtoGar3) external_gene_name Gene name +ogarnettii_gene_ensembl Bushbaby genes (OtoGar3) entrezgene NCBI gene ID +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hc_g110 AFFY HC G110 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_focus AFFY HG Focus probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u133a AFFY HG U133A probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u133a_2 AFFY HG U133A 2 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u133b AFFY HG U133B probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u95a AFFY HG U95A probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u95av2 AFFY HG U95Av2 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u95b AFFY HG U95B probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u95c AFFY HG U95C probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u95d AFFY HG U95D probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hg_u95e AFFY HG U95E probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hta_2_0 AFFY HTA 2 0 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hugenefl AFFY HuGeneFL probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_primeview AFFY PrimeView probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_rhesus AFFY Rhesus probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) affy_u133_x3p AFFY U133 X3P probe +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) ensembl_gene_id Gene stable ID +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) external_gene_name Gene name +mmulatta_gene_ensembl Macaque genes (Mmul_8.0.1) entrezgene NCBI gene ID +acarolinensis_gene_ensembl Anole lizard genes (AnoCar2.0) ensembl_gene_id Gene stable ID +acarolinensis_gene_ensembl Anole lizard genes (AnoCar2.0) external_gene_name Gene name +acarolinensis_gene_ensembl Anole lizard genes (AnoCar2.0) entrezgene NCBI gene ID +cchok1gshd_gene_ensembl Chinese hamster CHOK1GS genes (CHOK1GS_HDv1) ensembl_gene_id Gene stable ID +cchok1gshd_gene_ensembl Chinese hamster CHOK1GS genes (CHOK1GS_HDv1) external_gene_name Gene name +cchok1gshd_gene_ensembl Chinese hamster CHOK1GS genes (CHOK1GS_HDv1) entrezgene NCBI gene ID +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hc_g110 AFFY HC G110 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_focus AFFY HG Focus probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u133a AFFY HG U133A probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u133b AFFY HG U133B probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u95a AFFY HG U95A probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u95av2 AFFY HG U95Av2 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u95b AFFY HG U95B probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u95c AFFY HG U95C probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u95d AFFY HG U95D probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hg_u95e AFFY HG U95E probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hta_2_0 AFFY HTA 2 0 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hugenefl AFFY HuGeneFL probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_primeview AFFY PrimeView probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_rhesus AFFY Rhesus probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) affy_u133_x3p AFFY U133 X3P probe +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) ensembl_gene_id Gene stable ID +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) external_gene_name Gene name +catys_gene_ensembl Sooty mangabey genes (Caty_1.0) entrezgene NCBI gene ID +mpahari_gene_ensembl Shrew mouse genes (PAHARI_EIJ_v1.1) ensembl_gene_id Gene stable ID +mpahari_gene_ensembl Shrew mouse genes (PAHARI_EIJ_v1.1) external_gene_name Gene name +mpahari_gene_ensembl Shrew mouse genes (PAHARI_EIJ_v1.1) entrezgene NCBI gene ID +neugenii_gene_ensembl Wallaby genes (Meug_1.0) ensembl_gene_id Gene stable ID +neugenii_gene_ensembl Wallaby genes (Meug_1.0) external_gene_name Gene name +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hc_g110 AFFY HC G110 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_focus AFFY HG Focus probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u133a AFFY HG U133A probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u133a_2 AFFY HG U133A 2 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u133b AFFY HG U133B probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u95a AFFY HG U95A probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u95av2 AFFY HG U95Av2 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u95b AFFY HG U95B probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u95c AFFY HG U95C probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u95d AFFY HG U95D probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hg_u95e AFFY HG U95E probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hta_2_0 AFFY HTA 2 0 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hugenefl AFFY HuGeneFL probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_primeview AFFY PrimeView probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_rhesus AFFY Rhesus probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) affy_u133_x3p AFFY U133 X3P probe +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) ensembl_gene_id Gene stable ID +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) external_gene_name Gene name +rbieti_gene_ensembl Black snub-nosed monkey genes (ASM169854v1) entrezgene NCBI gene ID +lafricana_gene_ensembl Elephant genes (Loxafr3.0) ensembl_gene_id Gene stable ID +lafricana_gene_ensembl Elephant genes (Loxafr3.0) external_gene_name Gene name +lafricana_gene_ensembl Elephant genes (Loxafr3.0) entrezgene NCBI gene ID +mdomestica_gene_ensembl Opossum genes (monDom5) ensembl_gene_id Gene stable ID +mdomestica_gene_ensembl Opossum genes (monDom5) external_gene_name Gene name +mdomestica_gene_ensembl Opossum genes (monDom5) entrezgene NCBI gene ID +mauratus_gene_ensembl Golden Hamster genes (MesAur1.0) ensembl_gene_id Gene stable ID +mauratus_gene_ensembl Golden Hamster genes (MesAur1.0) external_gene_name Gene name +mauratus_gene_ensembl Golden Hamster genes (MesAur1.0) entrezgene NCBI gene ID +etelfairi_gene_ensembl Lesser hedgehog tenrec genes (TENREC) ensembl_gene_id Gene stable ID +etelfairi_gene_ensembl Lesser hedgehog tenrec genes (TENREC) external_gene_name Gene name +etelfairi_gene_ensembl Lesser hedgehog tenrec genes (TENREC) entrezgene NCBI gene ID +tguttata_gene_ensembl Zebra Finch genes (taeGut3.2.4) ensembl_gene_id Gene stable ID +tguttata_gene_ensembl Zebra Finch genes (taeGut3.2.4) external_gene_name Gene name +tguttata_gene_ensembl Zebra Finch genes (taeGut3.2.4) entrezgene NCBI gene ID +celegans_gene_ensembl Caenorhabditis elegans genes (WBcel235) affy_c_elegans AFFY C elegans probe +celegans_gene_ensembl Caenorhabditis elegans genes (WBcel235) affy_gpl19230 AFFY GPL19230 probe +celegans_gene_ensembl Caenorhabditis elegans genes (WBcel235) ensembl_gene_id Gene stable ID +celegans_gene_ensembl Caenorhabditis elegans genes (WBcel235) external_gene_name Gene name +celegans_gene_ensembl Caenorhabditis elegans genes (WBcel235) entrezgene NCBI gene ID +ngalili_gene_ensembl Upper Galilee mountains blind mole rat genes (S.galili_v1.0) ensembl_gene_id Gene stable ID +ngalili_gene_ensembl Upper Galilee mountains blind mole rat genes (S.galili_v1.0) external_gene_name Gene name +ngalili_gene_ensembl Upper Galilee mountains blind mole rat genes (S.galili_v1.0) entrezgene NCBI gene ID +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mg_u74a AFFY MG U74A probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mg_u74av2 AFFY MG U74Av2 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mg_u74b AFFY MG U74B probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mg_u74bv2 AFFY MG U74Bv2 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mg_u74c AFFY MG U74C probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mg_u74cv2 AFFY MG U74Cv2 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_moe430a AFFY MOE430A probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_moe430b AFFY MOE430B probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_moex_1_0_st_v1 AFFY MoEx 1 0 st v1 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mogene_1_0_st_v1 AFFY MoGene 1 0 st v1 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mogene_2_1_st_v1 AFFY MoGene 2 1 st v1 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mouse430a_2 AFFY Mouse430A 2 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mouse430_2 AFFY Mouse430 2 probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mu11ksuba AFFY Mu11KsubA probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) affy_mu11ksubb AFFY Mu11KsubB probe +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) ensembl_gene_id Gene stable ID +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) external_gene_name Gene name +mmusculus_gene_ensembl Mouse genes (GRCm38.p6) entrezgene NCBI gene ID +hfemale_gene_ensembl Naked mole-rat female genes (HetGla_female_1.0) ensembl_gene_id Gene stable ID +hfemale_gene_ensembl Naked mole-rat female genes (HetGla_female_1.0) external_gene_name Gene name +hfemale_gene_ensembl Naked mole-rat female genes (HetGla_female_1.0) entrezgene NCBI gene ID +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hc_g110 AFFY HC G110 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_focus AFFY HG Focus probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u133a AFFY HG U133A probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u133b AFFY HG U133B probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u95a AFFY HG U95A probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u95av2 AFFY HG U95Av2 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u95b AFFY HG U95B probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u95c AFFY HG U95C probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u95d AFFY HG U95D probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hg_u95e AFFY HG U95E probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hta_2_0 AFFY HTA 2 0 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hugenefl AFFY HuGeneFL probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_primeview AFFY PrimeView probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_rhesus AFFY Rhesus probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) affy_u133_x3p AFFY U133 X3P probe +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) ensembl_gene_id Gene stable ID +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) external_gene_name Gene name +pcoquereli_gene_ensembl Coquerel's sifaka genes (Pcoq_1.0) entrezgene NCBI gene ID +odegus_gene_ensembl Degu genes (OctDeg1.0) ensembl_gene_id Gene stable ID +odegus_gene_ensembl Degu genes (OctDeg1.0) external_gene_name Gene name +odegus_gene_ensembl Degu genes (OctDeg1.0) entrezgene NCBI gene ID +cintestinalis_gene_ensembl C.intestinalis genes (KH) affy_cint06a520380f AFFY CINT06a520380F probe +cintestinalis_gene_ensembl C.intestinalis genes (KH) ensembl_gene_id Gene stable ID +cintestinalis_gene_ensembl C.intestinalis genes (KH) external_gene_name Gene name +cintestinalis_gene_ensembl C.intestinalis genes (KH) entrezgene NCBI gene ID +cporcellus_gene_ensembl Guinea Pig genes (Cavpor3.0) ensembl_gene_id Gene stable ID +cporcellus_gene_ensembl Guinea Pig genes (Cavpor3.0) external_gene_name Gene name +cporcellus_gene_ensembl Guinea Pig genes (Cavpor3.0) entrezgene NCBI gene ID +amelanoleuca_gene_ensembl Panda genes (ailMel1) ensembl_gene_id Gene stable ID +amelanoleuca_gene_ensembl Panda genes (ailMel1) external_gene_name Gene name +amelanoleuca_gene_ensembl Panda genes (ailMel1) entrezgene NCBI gene ID +mcaroli_gene_ensembl Ryukyu mouse genes (CAROLI_EIJ_v1.1) ensembl_gene_id Gene stable ID +mcaroli_gene_ensembl Ryukyu mouse genes (CAROLI_EIJ_v1.1) external_gene_name Gene name +mcaroli_gene_ensembl Ryukyu mouse genes (CAROLI_EIJ_v1.1) entrezgene NCBI gene ID +gaculeatus_gene_ensembl Stickleback genes (BROAD S1) ensembl_gene_id Gene stable ID +gaculeatus_gene_ensembl Stickleback genes (BROAD S1) external_gene_name Gene name +gaculeatus_gene_ensembl Stickleback genes (BROAD S1) entrezgene NCBI gene ID +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hc_g110 AFFY HC G110 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_focus AFFY HG Focus probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u133a AFFY HG U133A probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u133a_2 AFFY HG U133A 2 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u133b AFFY HG U133B probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u95a AFFY HG U95A probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u95av2 AFFY HG U95Av2 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u95b AFFY HG U95B probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u95c AFFY HG U95C probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u95d AFFY HG U95D probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hg_u95e AFFY HG U95E probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hta_2_0 AFFY HTA 2 0 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hugenefl AFFY HuGeneFL probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_primeview AFFY PrimeView probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_rhesus AFFY Rhesus probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) affy_u133_x3p AFFY U133 X3P probe +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) ensembl_gene_id Gene stable ID +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) external_gene_name Gene name +rroxellana_gene_ensembl Golden snub-nosed monkey genes (Rrox_v1) entrezgene NCBI gene ID +csabaeus_gene_ensembl Vervet-AGM genes (ChlSab1.1) ensembl_gene_id Gene stable ID +csabaeus_gene_ensembl Vervet-AGM genes (ChlSab1.1) external_gene_name Gene name +csabaeus_gene_ensembl Vervet-AGM genes (ChlSab1.1) entrezgene NCBI gene ID +eeuropaeus_gene_ensembl Hedgehog genes (eriEur1) ensembl_gene_id Gene stable ID +eeuropaeus_gene_ensembl Hedgehog genes (eriEur1) external_gene_name Gene name +eeuropaeus_gene_ensembl Hedgehog genes (eriEur1) entrezgene NCBI gene ID +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hc_g110 AFFY HC G110 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_focus AFFY HG Focus probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u133a AFFY HG U133A probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u133b AFFY HG U133B probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u95a AFFY HG U95A probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u95av2 AFFY HG U95Av2 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u95b AFFY HG U95B probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u95c AFFY HG U95C probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u95d AFFY HG U95D probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hg_u95e AFFY HG U95E probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hta_2_0 AFFY HTA 2 0 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hugenefl AFFY HuGeneFL probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_primeview AFFY PrimeView probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_rhesus AFFY Rhesus probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) affy_u133_x3p AFFY U133 X3P probe +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) ensembl_gene_id Gene stable ID +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) external_gene_name Gene name +mleucophaeus_gene_ensembl Drill genes (Mleu.le_1.0) entrezgene NCBI gene ID +pformosa_gene_ensembl Amazon molly genes (Poecilia_formosa-5.1.2) ensembl_gene_id Gene stable ID +pformosa_gene_ensembl Amazon molly genes (Poecilia_formosa-5.1.2) external_gene_name Gene name +pformosa_gene_ensembl Amazon molly genes (Poecilia_formosa-5.1.2) entrezgene NCBI gene ID +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hc_g110 AFFY HC G110 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_focus AFFY HG Focus probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u133a AFFY HG U133A probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u133a_2 AFFY HG U133A 2 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u133b AFFY HG U133B probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u95a AFFY HG U95A probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u95av2 AFFY HG U95Av2 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u95b AFFY HG U95B probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u95c AFFY HG U95C probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u95d AFFY HG U95D probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hg_u95e AFFY HG U95E probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hta_2_0 AFFY HTA 2 0 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hugenefl AFFY HuGeneFL probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_primeview AFFY PrimeView probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_rhesus AFFY Rhesus probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) affy_u133_x3p AFFY U133 X3P probe +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) ensembl_gene_id Gene stable ID +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) external_gene_name Gene name +csyrichta_gene_ensembl Tarsier genes (Tarsius_syrichta-2.0.1) entrezgene NCBI gene ID +falbicollis_gene_ensembl Flycatcher genes (FicAlb_1.4) ensembl_gene_id Gene stable ID +falbicollis_gene_ensembl Flycatcher genes (FicAlb_1.4) external_gene_name Gene name +falbicollis_gene_ensembl Flycatcher genes (FicAlb_1.4) entrezgene NCBI gene ID +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hc_g110 AFFY HC G110 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_focus AFFY HG Focus probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u133a AFFY HG U133A probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u133a_2 AFFY HG U133A 2 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u133b AFFY HG U133B probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u95a AFFY HG U95A probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u95av2 AFFY HG U95Av2 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u95b AFFY HG U95B probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u95c AFFY HG U95C probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u95d AFFY HG U95D probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hg_u95e AFFY HG U95E probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hta_2_0 AFFY HTA 2 0 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hugenefl AFFY HuGeneFL probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_primeview AFFY PrimeView probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_rhesus AFFY Rhesus probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) affy_u133_x3p AFFY U133 X3P probe +ggorilla_gene_ensembl Gorilla genes (gorGor4) ensembl_gene_id Gene stable ID +ggorilla_gene_ensembl Gorilla genes (gorGor4) external_gene_name Gene name +ggorilla_gene_ensembl Gorilla genes (gorGor4) entrezgene NCBI gene ID +pmarinus_gene_ensembl Lamprey genes (Pmarinus_7.0) ensembl_gene_id Gene stable ID +pmarinus_gene_ensembl Lamprey genes (Pmarinus_7.0) external_gene_name Gene name +pmarinus_gene_ensembl Lamprey genes (Pmarinus_7.0) entrezgene NCBI gene ID +drerio_gene_ensembl Zebrafish genes (GRCz11) affy_zebgene_1_0_st_v1 AFFY ZebGene 1 0 st v1 probe +drerio_gene_ensembl Zebrafish genes (GRCz11) affy_zebgene_1_1_st_v1 AFFY ZebGene 1 1 st v1 probe +drerio_gene_ensembl Zebrafish genes (GRCz11) affy_zebrafish AFFY Zebrafish probe +drerio_gene_ensembl Zebrafish genes (GRCz11) ensembl_gene_id Gene stable ID +drerio_gene_ensembl Zebrafish genes (GRCz11) external_gene_name Gene name +drerio_gene_ensembl Zebrafish genes (GRCz11) entrezgene NCBI gene ID +itridecemlineatus_gene_ensembl Squirrel genes (SpeTri2.0) ensembl_gene_id Gene stable ID +itridecemlineatus_gene_ensembl Squirrel genes (SpeTri2.0) external_gene_name Gene name +itridecemlineatus_gene_ensembl Squirrel genes (SpeTri2.0) entrezgene NCBI gene ID +caperea_gene_ensembl Brazilian guinea pig genes (CavAp1.0) ensembl_gene_id Gene stable ID +caperea_gene_ensembl Brazilian guinea pig genes (CavAp1.0) external_gene_name Gene name +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hc_g110 AFFY HC G110 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_focus AFFY HG Focus probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u133a AFFY HG U133A probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u133b AFFY HG U133B probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u95a AFFY HG U95A probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u95av2 AFFY HG U95Av2 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u95b AFFY HG U95B probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u95c AFFY HG U95C probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u95d AFFY HG U95D probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hg_u95e AFFY HG U95E probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hta_2_0 AFFY HTA 2 0 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hugenefl AFFY HuGeneFL probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_primeview AFFY PrimeView probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_rhesus AFFY Rhesus probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) affy_u133_x3p AFFY U133 X3P probe +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) ensembl_gene_id Gene stable ID +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) external_gene_name Gene name +mfascicularis_gene_ensembl Crab-eating macaque genes (Macaca_fascicularis_5.0) entrezgene NCBI gene ID +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hc_g110 AFFY HC G110 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_focus AFFY HG Focus probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u133a AFFY HG U133A probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u133a_2 AFFY HG U133A 2 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u133b AFFY HG U133B probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u95a AFFY HG U95A probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u95av2 AFFY HG U95Av2 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u95b AFFY HG U95B probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u95c AFFY HG U95C probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u95d AFFY HG U95D probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hg_u95e AFFY HG U95E probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hta_2_0 AFFY HTA 2 0 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hugenefl AFFY HuGeneFL probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_primeview AFFY PrimeView probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_rhesus AFFY Rhesus probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) affy_u133_x3p AFFY U133 X3P probe +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) ensembl_gene_id Gene stable ID +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) external_gene_name Gene name +ppaniscus_gene_ensembl Bonobo genes (panpan1.1) entrezgene NCBI gene ID +mfuro_gene_ensembl Ferret genes (MusPutFur1.0) ensembl_gene_id Gene stable ID +mfuro_gene_ensembl Ferret genes (MusPutFur1.0) external_gene_name Gene name +mfuro_gene_ensembl Ferret genes (MusPutFur1.0) entrezgene NCBI gene ID +tbelangeri_gene_ensembl Tree Shrew genes (tupBel1) ensembl_gene_id Gene stable ID +tbelangeri_gene_ensembl Tree Shrew genes (tupBel1) external_gene_name Gene name +tbelangeri_gene_ensembl Tree Shrew genes (tupBel1) entrezgene NCBI gene ID +ocuniculus_gene_ensembl Rabbit genes (OryCun2.0) ensembl_gene_id Gene stable ID +ocuniculus_gene_ensembl Rabbit genes (OryCun2.0) external_gene_name Gene name +ocuniculus_gene_ensembl Rabbit genes (OryCun2.0) entrezgene NCBI gene ID +mochrogaster_gene_ensembl Prairie vole genes (MicOch1.0) ensembl_gene_id Gene stable ID +mochrogaster_gene_ensembl Prairie vole genes (MicOch1.0) external_gene_name Gene name +mochrogaster_gene_ensembl Prairie vole genes (MicOch1.0) entrezgene NCBI gene ID +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hc_g110 AFFY HC G110 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_focus AFFY HG Focus probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u133a AFFY HG U133A probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u133b AFFY HG U133B probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u95a AFFY HG U95A probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u95av2 AFFY HG U95Av2 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u95b AFFY HG U95B probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u95c AFFY HG U95C probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u95d AFFY HG U95D probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hg_u95e AFFY HG U95E probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hta_2_0 AFFY HTA 2 0 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hugenefl AFFY HuGeneFL probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_primeview AFFY PrimeView probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_rhesus AFFY Rhesus probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) affy_u133_x3p AFFY U133 X3P probe +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) ensembl_gene_id Gene stable ID +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) external_gene_name Gene name +cpalliatus_gene_ensembl Angola colobus genes (Cang.pa_1.0) entrezgene NCBI gene ID +choffmanni_gene_ensembl Sloth genes (choHof1) ensembl_gene_id Gene stable ID +choffmanni_gene_ensembl Sloth genes (choHof1) external_gene_name Gene name +cfamiliaris_gene_ensembl Dog genes (CanFam3.1) affy_canine_2 AFFY Canine 2 probe +cfamiliaris_gene_ensembl Dog genes (CanFam3.1) ensembl_gene_id Gene stable ID +cfamiliaris_gene_ensembl Dog genes (CanFam3.1) external_gene_name Gene name +cfamiliaris_gene_ensembl Dog genes (CanFam3.1) entrezgene NCBI gene ID +dmelanogaster_gene_ensembl Fruitfly genes (BDGP6) affy_drosgenome1 AFFY DrosGenome1 probe +dmelanogaster_gene_ensembl Fruitfly genes (BDGP6) affy_drosophila_2 AFFY Drosophila 2 probe +dmelanogaster_gene_ensembl Fruitfly genes (BDGP6) ensembl_gene_id Gene stable ID +dmelanogaster_gene_ensembl Fruitfly genes (BDGP6) external_gene_name Gene name +dmelanogaster_gene_ensembl Fruitfly genes (BDGP6) entrezgene NCBI gene ID +lchalumnae_gene_ensembl Coelacanth genes (LatCha1) ensembl_gene_id Gene stable ID +lchalumnae_gene_ensembl Coelacanth genes (LatCha1) external_gene_name Gene name +lchalumnae_gene_ensembl Coelacanth genes (LatCha1) entrezgene NCBI gene ID +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hc_g110 AFFY HC G110 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_focus AFFY HG Focus probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u133a AFFY HG U133A probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u133a_2 AFFY HG U133A 2 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u133b AFFY HG U133B probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u95a AFFY HG U95A probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u95av2 AFFY HG U95Av2 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u95b AFFY HG U95B probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u95c AFFY HG U95C probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u95d AFFY HG U95D probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hg_u95e AFFY HG U95E probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hta_2_0 AFFY HTA 2 0 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hugenefl AFFY HuGeneFL probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_primeview AFFY PrimeView probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) affy_u133_x3p AFFY U133 X3P probe +hsapiens_gene_ensembl Human genes (GRCh38.p12) ensembl_gene_id Gene stable ID +hsapiens_gene_ensembl Human genes (GRCh38.p12) external_gene_name Gene name +hsapiens_gene_ensembl Human genes (GRCh38.p12) entrezgene NCBI gene ID +fcatus_gene_ensembl Cat genes (Felis_catus_8.0) affy_felgene_1_0_st_v1 AFFY FelGene 1 0 st v1 probe +fcatus_gene_ensembl Cat genes (Felis_catus_8.0) affy_felgene_1_1_st_v1 AFFY FelGene 1 1 st v1 probe +fcatus_gene_ensembl Cat genes (Felis_catus_8.0) ensembl_gene_id Gene stable ID +fcatus_gene_ensembl Cat genes (Felis_catus_8.0) external_gene_name Gene name +fcatus_gene_ensembl Cat genes (Felis_catus_8.0) entrezgene NCBI gene ID +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hc_g110 AFFY HC G110 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_focus AFFY HG Focus probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u133a AFFY HG U133A probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u133b AFFY HG U133B probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u95a AFFY HG U95A probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u95av2 AFFY HG U95Av2 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u95b AFFY HG U95B probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u95c AFFY HG U95C probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u95d AFFY HG U95D probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hg_u95e AFFY HG U95E probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hta_2_0 AFFY HTA 2 0 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hugenefl AFFY HuGeneFL probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_primeview AFFY PrimeView probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_rhesus AFFY Rhesus probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) affy_u133_x3p AFFY U133 X3P probe +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) ensembl_gene_id Gene stable ID +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) external_gene_name Gene name +mmurinus_gene_ensembl Mouse Lemur genes (Mmur_3.0) entrezgene NCBI gene ID +amexicanus_gene_ensembl Cave fish genes (AstMex102) ensembl_gene_id Gene stable ID +amexicanus_gene_ensembl Cave fish genes (AstMex102) external_gene_name Gene name +amexicanus_gene_ensembl Cave fish genes (AstMex102) entrezgene NCBI gene ID +pabelii_gene_ensembl Orangutan genes (PPYG2) ensembl_gene_id Gene stable ID +pabelii_gene_ensembl Orangutan genes (PPYG2) external_gene_name Gene name +pabelii_gene_ensembl Orangutan genes (PPYG2) entrezgene NCBI gene ID +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hc_g110 AFFY HC G110 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_focus AFFY HG Focus probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u133a AFFY HG U133A probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u133a_2 AFFY HG U133A 2 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u133b AFFY HG U133B probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u95a AFFY HG U95A probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u95av2 AFFY HG U95Av2 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u95b AFFY HG U95B probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u95c AFFY HG U95C probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u95d AFFY HG U95D probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hg_u95e AFFY HG U95E probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hta_2_0 AFFY HTA 2 0 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hugenefl AFFY HuGeneFL probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_primeview AFFY PrimeView probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_rhesus AFFY Rhesus probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) affy_u133_x3p AFFY U133 X3P probe +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) ensembl_gene_id Gene stable ID +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) external_gene_name Gene name +cjacchus_gene_ensembl Marmoset genes (ASM275486v1) entrezgene NCBI gene ID +tnigroviridis_gene_ensembl Tetraodon genes (TETRAODON 8.0) ensembl_gene_id Gene stable ID +tnigroviridis_gene_ensembl Tetraodon genes (TETRAODON 8.0) external_gene_name Gene name +tnigroviridis_gene_ensembl Tetraodon genes (TETRAODON 8.0) entrezgene NCBI gene ID +pvampyrus_gene_ensembl Megabat genes (pteVam1) ensembl_gene_id Gene stable ID +pvampyrus_gene_ensembl Megabat genes (pteVam1) external_gene_name Gene name +pvampyrus_gene_ensembl Megabat genes (pteVam1) entrezgene NCBI gene ID +ggallus_gene_ensembl Chicken genes (Gallus_gallus-5.0) affy_chicken AFFY Chicken probe +ggallus_gene_ensembl Chicken genes (Gallus_gallus-5.0) ensembl_gene_id Gene stable ID +ggallus_gene_ensembl Chicken genes (Gallus_gallus-5.0) external_gene_name Gene name +ggallus_gene_ensembl Chicken genes (Gallus_gallus-5.0) entrezgene NCBI gene ID +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hc_g110 AFFY HC G110 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_focus AFFY HG Focus probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u133a AFFY HG U133A probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u133b AFFY HG U133B probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u95a AFFY HG U95A probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u95av2 AFFY HG U95Av2 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u95b AFFY HG U95B probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u95c AFFY HG U95C probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u95d AFFY HG U95D probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hg_u95e AFFY HG U95E probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hta_2_0 AFFY HTA 2 0 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hugenefl AFFY HuGeneFL probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_primeview AFFY PrimeView probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_rhesus AFFY Rhesus probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) affy_u133_x3p AFFY U133 X3P probe +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) ensembl_gene_id Gene stable ID +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) external_gene_name Gene name +sboliviensis_gene_ensembl Bolivian squirrel monkey genes (SaiBol1.0) entrezgene NCBI gene ID +psinensis_gene_ensembl Chinese softshell turtle genes (PelSin_1.0) ensembl_gene_id Gene stable ID +psinensis_gene_ensembl Chinese softshell turtle genes (PelSin_1.0) external_gene_name Gene name +psinensis_gene_ensembl Chinese softshell turtle genes (PelSin_1.0) entrezgene NCBI gene ID +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hc_g110 AFFY HC G110 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_focus AFFY HG Focus probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u133a AFFY HG U133A probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u133b AFFY HG U133B probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u95a AFFY HG U95A probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u95av2 AFFY HG U95Av2 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u95b AFFY HG U95B probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u95c AFFY HG U95C probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u95d AFFY HG U95D probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hg_u95e AFFY HG U95E probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hta_2_0 AFFY HTA 2 0 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hugenefl AFFY HuGeneFL probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_primeview AFFY PrimeView probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_rhesus AFFY Rhesus probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) affy_u133_x3p AFFY U133 X3P probe +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) ensembl_gene_id Gene stable ID +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) external_gene_name Gene name +nleucogenys_gene_ensembl Gibbon genes (Nleu_3.0) entrezgene NCBI gene ID +mgallopavo_gene_ensembl Turkey genes (Turkey_2.01) ensembl_gene_id Gene stable ID +mgallopavo_gene_ensembl Turkey genes (Turkey_2.01) external_gene_name Gene name +mgallopavo_gene_ensembl Turkey genes (Turkey_2.01) entrezgene NCBI gene ID +gmorhua_gene_ensembl Cod genes (gadMor1) ensembl_gene_id Gene stable ID +gmorhua_gene_ensembl Cod genes (gadMor1) external_gene_name Gene name +vpacos_gene_ensembl Alpaca genes (vicPac1) ensembl_gene_id Gene stable ID +vpacos_gene_ensembl Alpaca genes (vicPac1) external_gene_name Gene name +vpacos_gene_ensembl Alpaca genes (vicPac1) entrezgene NCBI gene ID +ccrigri_gene_ensembl Chinese hamster CriGri genes (CriGri_1.0) ensembl_gene_id Gene stable ID +ccrigri_gene_ensembl Chinese hamster CriGri genes (CriGri_1.0) external_gene_name Gene name +ccrigri_gene_ensembl Chinese hamster CriGri genes (CriGri_1.0) entrezgene NCBI gene ID +oanatinus_gene_ensembl Platypus genes (OANA5) affy_platypus_exon AFFY platypus exon probe +oanatinus_gene_ensembl Platypus genes (OANA5) ensembl_gene_id Gene stable ID +oanatinus_gene_ensembl Platypus genes (OANA5) external_gene_name Gene name +oanatinus_gene_ensembl Platypus genes (OANA5) entrezgene NCBI gene ID +dordii_gene_ensembl Kangaroo rat genes (Dord_2.0) ensembl_gene_id Gene stable ID +dordii_gene_ensembl Kangaroo rat genes (Dord_2.0) external_gene_name Gene name +dordii_gene_ensembl Kangaroo rat genes (Dord_2.0) entrezgene NCBI gene ID +ttruncatus_gene_ensembl Dolphin genes (turTru1) ensembl_gene_id Gene stable ID +ttruncatus_gene_ensembl Dolphin genes (turTru1) external_gene_name Gene name +ttruncatus_gene_ensembl Dolphin genes (turTru1) entrezgene NCBI gene ID +ecaballus_gene_ensembl Horse genes (Equ Cab 2) ensembl_gene_id Gene stable ID +ecaballus_gene_ensembl Horse genes (Equ Cab 2) external_gene_name Gene name +ecaballus_gene_ensembl Horse genes (Equ Cab 2) entrezgene NCBI gene ID +oprinceps_gene_ensembl Pika genes (OchPri2.0-Ens) ensembl_gene_id Gene stable ID +oprinceps_gene_ensembl Pika genes (OchPri2.0-Ens) external_gene_name Gene name +oprinceps_gene_ensembl Pika genes (OchPri2.0-Ens) entrezgene NCBI gene ID +loculatus_gene_ensembl Spotted gar genes (LepOcu1) ensembl_gene_id Gene stable ID +loculatus_gene_ensembl Spotted gar genes (LepOcu1) external_gene_name Gene name +loculatus_gene_ensembl Spotted gar genes (LepOcu1) entrezgene NCBI gene ID +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rae230a AFFY RAE230A probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rae230b AFFY RAE230B probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_raex_1_0_st_v1 AFFY RaEx 1 0 st v1 probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_ragene_1_0_st_v1 AFFY RaGene 1 0 st v1 probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_ragene_2_1_st_v1 AFFY RaGene 2 1 st v1 probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rat230_2 AFFY Rat230 2 probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rg_u34a AFFY RG U34A probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rg_u34b AFFY RG U34B probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rg_u34c AFFY RG U34C probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rn_u34 AFFY RN U34 probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) affy_rt_u34 AFFY RT U34 probe +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) ensembl_gene_id Gene stable ID +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) external_gene_name Gene name +rnorvegicus_gene_ensembl Rat genes (Rnor_6.0) entrezgene NCBI gene ID +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hc_g110 AFFY HC G110 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_focus AFFY HG Focus probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u133a AFFY HG U133A probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u133b AFFY HG U133B probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u95a AFFY HG U95A probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u95av2 AFFY HG U95Av2 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u95b AFFY HG U95B probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u95c AFFY HG U95C probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u95d AFFY HG U95D probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hg_u95e AFFY HG U95E probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hta_2_0 AFFY HTA 2 0 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hugenefl AFFY HuGeneFL probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_primeview AFFY PrimeView probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_rhesus AFFY Rhesus probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) affy_u133_x3p AFFY U133 X3P probe +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) ensembl_gene_id Gene stable ID +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) external_gene_name Gene name +ptroglodytes_gene_ensembl Chimpanzee genes (Pan_tro_3.0) entrezgene NCBI gene ID +oaries_gene_ensembl Sheep genes (Oar_v3.1) ensembl_gene_id Gene stable ID +oaries_gene_ensembl Sheep genes (Oar_v3.1) external_gene_name Gene name +oaries_gene_ensembl Sheep genes (Oar_v3.1) entrezgene NCBI gene ID +jjaculus_gene_ensembl Lesser Egyptian jerboa genes (JacJac1.0) ensembl_gene_id Gene stable ID +jjaculus_gene_ensembl Lesser Egyptian jerboa genes (JacJac1.0) external_gene_name Gene name +jjaculus_gene_ensembl Lesser Egyptian jerboa genes (JacJac1.0) entrezgene NCBI gene ID +scerevisiae_gene_ensembl Saccharomyces cerevisiae genes (R64-1-1) affy_yeast_2 AFFY Yeast 2 probe +scerevisiae_gene_ensembl Saccharomyces cerevisiae genes (R64-1-1) affy_yg_s98 AFFY YG S98 probe +scerevisiae_gene_ensembl Saccharomyces cerevisiae genes (R64-1-1) ensembl_gene_id Gene stable ID +scerevisiae_gene_ensembl Saccharomyces cerevisiae genes (R64-1-1) external_gene_name Gene name +scerevisiae_gene_ensembl Saccharomyces cerevisiae genes (R64-1-1) entrezgene NCBI gene ID +chircus_gene_ensembl Goat genes (ARS1) ensembl_gene_id Gene stable ID +chircus_gene_ensembl Goat genes (ARS1) external_gene_name Gene name +chircus_gene_ensembl Goat genes (ARS1) entrezgene NCBI gene ID +hmale_gene_ensembl Naked mole-rat male genes (HetGla_1.0) ensembl_gene_id Gene stable ID +hmale_gene_ensembl Naked mole-rat male genes (HetGla_1.0) external_gene_name Gene name +hmale_gene_ensembl Naked mole-rat male genes (HetGla_1.0) entrezgene NCBI gene ID +saraneus_gene_ensembl Shrew genes (sorAra1) ensembl_gene_id Gene stable ID +saraneus_gene_ensembl Shrew genes (sorAra1) external_gene_name Gene name +saraneus_gene_ensembl Shrew genes (sorAra1) entrezgene NCBI gene ID +btaurus_gene_ensembl Cow genes (UMD3.1) affy_bovine AFFY Bovine probe +btaurus_gene_ensembl Cow genes (UMD3.1) ensembl_gene_id Gene stable ID +btaurus_gene_ensembl Cow genes (UMD3.1) external_gene_name Gene name +btaurus_gene_ensembl Cow genes (UMD3.1) entrezgene NCBI gene ID +dnovemcinctus_gene_ensembl Armadillo genes (Dasnov3.0) ensembl_gene_id Gene stable ID +dnovemcinctus_gene_ensembl Armadillo genes (Dasnov3.0) external_gene_name Gene name +dnovemcinctus_gene_ensembl Armadillo genes (Dasnov3.0) entrezgene NCBI gene ID +oniloticus_gene_ensembl Tilapia genes (Orenil1.0) ensembl_gene_id Gene stable ID +oniloticus_gene_ensembl Tilapia genes (Orenil1.0) external_gene_name Gene name +oniloticus_gene_ensembl Tilapia genes (Orenil1.0) entrezgene NCBI gene ID +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mg_u74a AFFY MG U74A probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mg_u74av2 AFFY MG U74Av2 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mg_u74b AFFY MG U74B probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mg_u74bv2 AFFY MG U74Bv2 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mg_u74c AFFY MG U74C probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mg_u74cv2 AFFY MG U74Cv2 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_moe430a AFFY MOE430A probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_moe430b AFFY MOE430B probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_moex_1_0_st_v1 AFFY MoEx 1 0 st v1 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mogene_1_0_st_v1 AFFY MoGene 1 0 st v1 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mogene_2_1_st_v1 AFFY MoGene 2 1 st v1 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mouse430a_2 AFFY Mouse430A 2 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mouse430_2 AFFY Mouse430 2 probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mu11ksuba AFFY Mu11KsubA probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) affy_mu11ksubb AFFY Mu11KsubB probe +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) ensembl_gene_id Gene stable ID +mspretus_gene_ensembl Algerian mouse genes (SPRET_EiJ_v1) external_gene_name Gene name +olatipes_gene_ensembl Medaka genes (HdrR) ensembl_gene_id Gene stable ID +olatipes_gene_ensembl Medaka genes (HdrR) external_gene_name Gene name +olatipes_gene_ensembl Medaka genes (HdrR) entrezgene NCBI gene ID +pbairdii_gene_ensembl Northern American deer mouse genes (Pman_1.0) ensembl_gene_id Gene stable ID +pbairdii_gene_ensembl Northern American deer mouse genes (Pman_1.0) external_gene_name Gene name +pbairdii_gene_ensembl Northern American deer mouse genes (Pman_1.0) entrezgene NCBI gene ID +sharrisii_gene_ensembl Tasmanian devil genes (Devil_ref v7.0) ensembl_gene_id Gene stable ID +sharrisii_gene_ensembl Tasmanian devil genes (Devil_ref v7.0) external_gene_name Gene name +sharrisii_gene_ensembl Tasmanian devil genes (Devil_ref v7.0) entrezgene NCBI gene ID +xmaculatus_gene_ensembl Platyfish genes (Xipmac4.4.2) ensembl_gene_id Gene stable ID +xmaculatus_gene_ensembl Platyfish genes (Xipmac4.4.2) external_gene_name Gene name +xmaculatus_gene_ensembl Platyfish genes (Xipmac4.4.2) entrezgene NCBI gene ID +trubripes_gene_ensembl Fugu genes (FUGU 4.0) ensembl_gene_id Gene stable ID +trubripes_gene_ensembl Fugu genes (FUGU 4.0) external_gene_name Gene name +trubripes_gene_ensembl Fugu genes (FUGU 4.0) entrezgene NCBI gene ID +mlucifugus_gene_ensembl Microbat genes (Myoluc2.0) ensembl_gene_id Gene stable ID +mlucifugus_gene_ensembl Microbat genes (Myoluc2.0) external_gene_name Gene name +mlucifugus_gene_ensembl Microbat genes (Myoluc2.0) entrezgene NCBI gene ID +aplatyrhynchos_gene_ensembl Duck genes (BGI_duck_1.0) ensembl_gene_id Gene stable ID +aplatyrhynchos_gene_ensembl Duck genes (BGI_duck_1.0) external_gene_name Gene name +aplatyrhynchos_gene_ensembl Duck genes (BGI_duck_1.0) entrezgene NCBI gene ID +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hc_g110 AFFY HC G110 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_focus AFFY HG Focus probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u133a AFFY HG U133A probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u133b AFFY HG U133B probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u95a AFFY HG U95A probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u95av2 AFFY HG U95Av2 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u95b AFFY HG U95B probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u95c AFFY HG U95C probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u95d AFFY HG U95D probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hg_u95e AFFY HG U95E probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hta_2_0 AFFY HTA 2 0 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hugenefl AFFY HuGeneFL probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_primeview AFFY PrimeView probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_rhesus AFFY Rhesus probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) affy_u133_x3p AFFY U133 X3P probe +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) ensembl_gene_id Gene stable ID +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) external_gene_name Gene name +anancymaae_gene_ensembl Ma's night monkey genes (Anan_2.0) entrezgene NCBI gene ID +clanigera_gene_ensembl Long-tailed chinchilla genes (ChiLan1.0) ensembl_gene_id Gene stable ID +clanigera_gene_ensembl Long-tailed chinchilla genes (ChiLan1.0) external_gene_name Gene name +clanigera_gene_ensembl Long-tailed chinchilla genes (ChiLan1.0) entrezgene NCBI gene ID +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_cyngene_1_0_st_v1 AFFY CynGene 1 0 st v1 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_cyrgene_1_0_st_v1 AFFY CyRGene 1 0 st v1 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hc_g110 AFFY HC G110 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_focus AFFY HG Focus probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u133a AFFY HG U133A probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u133a_2 AFFY HG U133A 2 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u133b AFFY HG U133B probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u133_plus_2 AFFY HG U133 Plus 2 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u95a AFFY HG U95A probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u95av2 AFFY HG U95Av2 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u95b AFFY HG U95B probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u95c AFFY HG U95C probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u95d AFFY HG U95D probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hg_u95e AFFY HG U95E probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hta_2_0 AFFY HTA 2 0 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_huex_1_0_st_v2 AFFY HuEx 1 0 st v2 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hugenefl AFFY HuGeneFL probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hugene_1_0_st_v1 AFFY HuGene 1 0 st v1 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_hugene_2_0_st_v1 AFFY HuGene 2 0 st v1 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_primeview AFFY PrimeView probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_rhegene_1_0_st_v1 AFFY RheGene 1 0 st v1 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_rhegene_1_1_st_v1 AFFY RheGene 1 1 st v1 probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_rhesus AFFY Rhesus probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) affy_u133_x3p AFFY U133 X3P probe +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) ensembl_gene_id Gene stable ID +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) external_gene_name Gene name +ccapucinus_gene_ensembl Capuchin genes (Cebus_imitator-1.0) entrezgene NCBI gene ID +pcapensis_gene_ensembl Hyrax genes (proCap1) ensembl_gene_id Gene stable ID +pcapensis_gene_ensembl Hyrax genes (proCap1) external_gene_name Gene name +pcapensis_gene_ensembl Hyrax genes (proCap1) entrezgene NCBI gene ID +fdamarensis_gene_ensembl Damara mole rat genes (DMR_v1.0) ensembl_gene_id Gene stable ID +fdamarensis_gene_ensembl Damara mole rat genes (DMR_v1.0) external_gene_name Gene name +fdamarensis_gene_ensembl Damara mole rat genes (DMR_v1.0) entrezgene NCBI gene ID
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy/wrappers/tool_data_table_conf.xml.sample Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,13 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of files required for apt tool --> + <table name="aptTool" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, pathPGF, pathCLF, pathMPS, pathBGP, pathCDF, pathAnnotTrans, pathAnnotProbe, versionInfo</columns> + <file path="${__HERE__}/tool-data/aptTool.loc" /> + </table> + <!-- Locations of files required for LIMMA tool --> + <table name="LimmaTool" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, rowTypeID, rowTypeName</columns> + <file path="${__HERE__}/tool-data/LimmaTool.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ExprPlotsScript.R Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,465 @@ +# A command-line interface to basic plots for use with Galaxy +# written by Jimmy Vandel +# one of these arguments is required: +# +# +initial.options <- commandArgs(trailingOnly = FALSE) +file.arg.name <- "--file=" +script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) +script.basename <- dirname(script.name) +source(file.path(script.basename, "utils.R")) +source(file.path(script.basename, "getopt.R")) + +#addComment("Welcome R!") + +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +loc <- Sys.setlocale("LC_NUMERIC", "C") + +#get starting time +start.time <- Sys.time() + +#get options +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs() + + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "dataFile", "i", 1, "character", + "factorInfo","t", 1, "character", + "dataFileFormat","j",1,"character", + "conditionNames","c",1,"character", + "format", "f", 1, "character", + "quiet", "q", 0, "logical", + "log", "l", 1, "character", + "histo" , "h", 1, "character", + "maPlot" , "a", 1, "character", + "boxplot" , "b", 1, "character", + "microarray" , "m", 1, "character", + "acp" , "p" , 1, "character", + "screePlot" , "s" , 1, "character"), + byrow=TRUE, ncol=4) +opt <- getopt(spec) + +# enforce the following required arguments +if (is.null(opt$log)) { + addComment("[ERROR]'log file' is required") + q( "no", 1, F ) +} +addComment("[INFO]Start of R script",T,opt$log,display=FALSE) +if (is.null(opt$dataFile) || is.null(opt$dataFileFormat)) { + addComment("[ERROR]'dataFile' and it format are required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$format)) { + addComment("[ERROR]'output format' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$histo) & is.null(opt$maPlot) & is.null(opt$boxplot) & is.null(opt$microarray) & is.null(opt$acp)){ + addComment("[ERROR]Select at least one plot to draw",T,opt$log) + q( "no", 1, F ) +} + +verbose <- if (is.null(opt$quiet)) { + TRUE +}else{ + FALSE} + +addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) + +addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) +addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) + +#directory for plots +dir.create(file.path(getwd(), "plotDir")) +dir.create(file.path(getwd(), "plotLyDir")) + +#silent package loading +suppressPackageStartupMessages({ + library("oligo") + library("ff") + library("ggplot2") + library("plotly") +}) + + +#chargement des fichiers en entrée +#fichier de type CEL +dataAreFromCel=FALSE +if(toupper(opt$dataFileFormat)=="CEL"){ + dataAreFromCel=TRUE + celData=read.celfiles(unlist(strsplit(opt$dataFile,","))) + #load all expressions + dataMatrix=exprs(celData) + #select "pm" probes + probeInfo=getProbeInfo(celData,probeType = c("pm"),target="probeset") + #reduce dataMatrix to log expression matrix for a randomly probe selection + dataMatrix=log2(dataMatrix[sample(unique(probeInfo[,1]),min(100000,length(unique(probeInfo[,1])))),]) + addComment("[INFO]Raw data are log2 transformed",TRUE,opt$log,display=FALSE) + remove(probeInfo) +}else{ + #fichier deja tabule + dataMatrix=read.csv(file=opt$dataFile,header=F,sep="\t",colClasses="character") + #remove first row to convert it as colnames (to avoid X before colnames with header=T) + colNamesData=dataMatrix[1,-1] + dataMatrix=dataMatrix[-1,] + #remove first colum to convert it as rownames + rowNamesData=dataMatrix[,1] + dataMatrix=dataMatrix[,-1] + if(is.data.frame(dataMatrix)){ + dataMatrix=data.matrix(dataMatrix) + }else{ + dataMatrix=data.matrix(as.numeric(dataMatrix)) + } + dimnames(dataMatrix)=list(rowNamesData,colNamesData) + if(any(duplicated(rowNamesData)))addComment("[WARNING] several rows share the same probe/gene name, you should merge or rename them to avoid further analysis mistakes",TRUE,opt$log,display=FALSE) +} + +addComment("[INFO]Input data loaded",TRUE,opt$log,display=FALSE) +addComment(c("[INFO]Dim of data matrix:",dim(dataMatrix)),T,opt$log,display=FALSE) + +#get number of conditions +nbConditions=ncol(dataMatrix) + +#get condition names if they are specified +if(!is.null(opt$conditionNames) && length(opt$conditionNames)==nbConditions){ + nameConditions=opt$conditionNames + colnames(dataMatrix)=nameConditions + #rownames(phenoData(celData)@data)=nameConditions + #rownames(protocolData(celData)@data)=nameConditions +}else{ + nameConditions=colnames(dataMatrix) +} + +#create a correspondance table between plot file names and name displayed in figure legend and html items +correspondanceNameTable=matrix("",ncol=2,nrow=nbConditions) +correspondanceNameTable[,1]=paste("Condition",1:nbConditions,sep="") +correspondanceNameTable[,2]=nameConditions +rownames(correspondanceNameTable)=correspondanceNameTable[,2] + +addComment("[INFO]Retreive condition names",TRUE,opt$log,display=FALSE) + +if(!is.null(opt$factorInfo)){ + #chargement du fichier des facteurs + factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character") + #remove first row to convert it as colnames + colnames(factorInfoMatrix)=factorInfoMatrix[1,] + factorInfoMatrix=factorInfoMatrix[-1,] + #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case + rownames(factorInfoMatrix)=factorInfoMatrix[,1] + + + if(length(setdiff(colnames(dataMatrix),rownames(factorInfoMatrix)))!=0){ + addComment("[ERROR]Missing samples in factor file",T,opt$log) + q( "no", 1, F ) + } + + #order sample as in expression matrix and remove spurious sample + factorInfoMatrix=factorInfoMatrix[colnames(dataMatrix),] + + addComment("[INFO]Factors OK",T,opt$log,display=FALSE) + addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) + +} + +addComment("[INFO]Ready to plot",T,opt$log,display=FALSE) + + +##---------------------- + +###plot histograms### +histogramPerFigure=50 +if (!is.null(opt$histo)) { + for(iToPlot in 1:(((nbConditions-1)%/%histogramPerFigure)+1)){ + firstPlot=1+histogramPerFigure*(iToPlot-1) + lastPlot=min(nbConditions,histogramPerFigure*iToPlot) + dataToPlot=data.frame(x=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix))) + p <- ggplot(data=dataToPlot, aes(x = x, color=Experiment)) + stat_density(geom="line", size=1, position="identity") + + ggtitle("Intensity densities") + theme_bw() + ylab(label="Density") + + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + if(dataAreFromCel){ + #original ploting function + #hist(celData[,firstPlot:lastPlot],lty=rep(1,nbConditions)[firstPlot:lastPlot],lwd=2,which='pm',target="probeset",transfo=log2,col=rainbow(nbConditions)[firstPlot:lastPlot]) + p <- p + xlab(label="Log2 intensities") + }else{ + p <- p + xlab(label="Intensities") + } + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse="")) + } + print(p) + dev.off() + #save plotly files + pp <- ggplotly(p) + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,iToPlot,".html"),collapse=""),selfcontained = F) + } + remove(p,dataToPlot) + addComment("[INFO]Histograms drawn",T,opt$log,display=FALSE) +} + +##---------------------- + +###plot MAplots### +MAplotPerPage=4 +if (!is.null(opt$maPlot)) { + iToPlot=1 + plotVector=list() + toTake=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix))) + refMedianColumn=rowMedians(as.matrix(dataMatrix[toTake,])) + if(length(toTake)>100000)addComment(c("[INFO]high number of input data rows ",length(toTake),"; the generation of MA plot can take a while, please be patient"),TRUE,opt$log,display=FALSE) + for (iCondition in 1:nbConditions){ + #MAplot(celData,which=i,what=pm,transfo=log2) + #smoothScatter(x=xToPlot,y=yToPlot,main=nameConditions[iCondition]) + dataA=dataMatrix[toTake,iCondition] + dataB=refMedianColumn####ATTENTION PAR DEFAUT + xToPlot=0.5*(dataA+dataB) + yToPlot=dataA-dataB + tempX=seq(min(xToPlot),max(xToPlot),0.1) + tempY=unlist(lapply(tempX,function(x){median(yToPlot[intersect(which(xToPlot>=(x-0.1/2)),which(xToPlot<(x+0.1/2)))])})) + + dataToPlot=data.frame(x=xToPlot,y=yToPlot) + dataMedianToPlot=data.frame(x=tempX,y=tempY) + p <- ggplot(data=dataToPlot, aes(x,y)) + stat_density2d(aes(fill = ..density..^0.25), geom = "tile", contour = FALSE, n = 100) + + scale_fill_continuous(low = "white", high = "dodgerblue4") + geom_smooth(data=dataMedianToPlot,colour="red", size=0.5, se=FALSE) + + ggtitle(correspondanceNameTable[iCondition,2]) + theme_bw() + xlab(label="") + ylab(label="") + + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none") + plotVector[[length(plotVector)+1]]=p + + #save plotly files + pp <- ggplotly(p) + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$maPlot,"_",correspondanceNameTable[iCondition,1],".html"),collapse=""),selfcontained = F) + + if(iCondition==nbConditions || length(plotVector)==MAplotPerPage){ + #define a new plotting file + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$maPlot,iToPlot,".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/",opt$maPlot,iToPlot,".png"),collapse="")) + } + multiplot(plotlist=plotVector,cols=2) + dev.off() + if(iCondition<nbConditions){ + #prepare for a new plotting file if necessary + plotVector=list() + iToPlot=iToPlot+1 + } + } + } + remove(p,dataToPlot,dataA,dataB,toTake,xToPlot,yToPlot) + addComment("[INFO]MAplots drawn",T,opt$log,display=FALSE) +} + +##---------------------- + +###plot boxplots### +boxplotPerFigure=50 +if (!is.null(opt$boxplot)) { + for(iToPlot in 1:(((nbConditions-1)%/%boxplotPerFigure)+1)){ + firstPlot=1+boxplotPerFigure*(iToPlot-1) + lastPlot=min(nbConditions,boxplotPerFigure*iToPlot) + dataToPlot=data.frame(intensities=c(dataMatrix[,firstPlot:lastPlot]),Experiment=rep(colnames(dataMatrix)[firstPlot:lastPlot],each=nrow(dataMatrix))) + #to make HTML file lighter, sampling will be done amongst outliers + #get outliers for each boxplot + boxplotsOutliers=apply(dataMatrix[,firstPlot:lastPlot],2,function(x)boxplot.stats(x)$out) + #sample amongst them to keep at maximum of 1000 points and include both min and max outliers values + boxplotsOutliers=lapply(boxplotsOutliers,function(x)if(length(x)>0)c(sample(c(x),min(length(x),1000)),max(c(x)),min(c(x)))) + dataOutliers=data.frame(yVal=unlist(boxplotsOutliers),xVal=unlist(lapply(seq_along(boxplotsOutliers),function(x)rep(names(boxplotsOutliers)[x],length(boxplotsOutliers[[x]]))))) + #plot boxplots without outliers + p <- ggplot(data=dataToPlot, aes(y = intensities, x=Experiment ,color=Experiment)) + geom_boxplot(outlier.colour=NA,outlier.shape =NA) + + ggtitle("Intensities") + theme_bw() + xlab(label="") + + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),axis.text.x = element_text(angle = 45, hjust = 1),plot.margin=unit(c(10,10,max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x))))),15+max(unlist(lapply(dataToPlot$Experiment,function(x)nchar(as.character(x)))))),"mm")) + #add to plot sampled outliers + p <- p + geom_point(data=dataOutliers,aes(x=xVal,y=yVal,color=xVal),inherit.aes = F) + if(dataAreFromCel){ + #original plotting function + #boxplot(celData[,firstPlot:lastPlot],which='pm',col=rainbow(nbConditions)[firstPlot:lastPlot],target="probeset",transfo=log2,names=nameConditions[firstPlot:lastPlot],main="Intensities") + p <- p + ylab(label="Log2 intensities") + }else{ + p <- p + ylab(label="Intensities") + } + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$boxplot,iToPlot,".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/",opt$boxplot,iToPlot,".png"),collapse="")) + } + print(p) + dev.off() + #save plotly files + pp <- ggplotly(p) + + #modify plotly object to get HTML file not too heavy for loading + for(iData in 1:length(pp$x$data)){ + ##get kept outliers y values + #yPointsToKeep=dataOutliers$yVal[which(dataOutliers$xVal==pp$x$data[[iData]]$name)] + if(pp$x$data[[iData]]$type=="scatter"){ + ##scatter plot represent outliers points added to boxplot through geom_point + ##nothing to do as outliers have been sampled allready, just have to modify hover text + #if(length(yPointsToKeep)>0){ + #pointsToKeep=which(pp$x$data[[iData]]$y %in% yPointsToKeep) + #pp$x$data[[iData]]$x=pp$x$data[[iData]]$x[pointsToKeep] + #pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[pointsToKeep] + #pp$x$data[[iData]]$text=pp$x$data[[iData]]$text[pointsToKeep] + #}else{ + #pp$x$data[[iData]]$x=NULL + #pp$x$data[[iData]]$y=NULL + #pp$x$data[[iData]]$marker$opacity=0 + #pp$x$data[[iData]]$hoverinfo=NULL + #pp$x$data[[iData]]$text=NULL + #} + #modify text to display + if(dataAreFromCel){ + pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("log2(intensity) ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = "")))) + }else{ + pp$x$data[[iData]]$text=unlist(lapply(seq_along(pp$x$data[[iData]]$y),function(x)return(paste(c("intensity ",prettyNum(pp$x$data[[iData]]$y[x],digits=4)),collapse = "")))) + } + }else{ + ##disable marker plotting to keep only box and whiskers plot (outliers are displayed through scatter plot) + pp$x$data[[iData]]$marker$opacity=0 + + #sample 50000 points amongst all data to get a lighter html file, sampling size should not be too low to avoid modifying limit of boxplots + pp$x$data[[iData]]$y=c(sample(dataMatrix[,pp$x$data[[iData]]$name],min(length(dataMatrix[,pp$x$data[[iData]]$name]),50000)),min(dataMatrix[,pp$x$data[[iData]]$name]),max(dataMatrix[,pp$x$data[[iData]]$name])) + pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y)) + + ##first remove outliers info + #downUpValues=boxplot.stats(dataMatrix[,pp$x$data[[iData]]$name])$stats + #if(verbose)addComment(c("filter values for boxplot",pp$x$data[[iData]]$name,"between",min(downUpValues),"and",max(downUpValues)),T,opt$log) + #pointsToRemove=which(pp$x$data[[iData]]$y<min(downUpValues)) + #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove] + #pointsToRemove=which(pp$x$data[[iData]]$y>max(downUpValues)) + #if(length(pointsToRemove)>0)pp$x$data[[iData]]$y=pp$x$data[[iData]]$y[-pointsToRemove] + #then add sampled outliers info + #pp$x$data[[iData]]$y=c(yPointsToKeep,pp$x$data[[iData]]$y) + #pp$x$data[[iData]]$x=rep(pp$x$data[[iData]]$x[1],length(pp$x$data[[iData]]$y)) + } + } + + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$boxplot,iToPlot,".html"),collapse=""),selfcontained = F) + } + remove(p,dataToPlot) + addComment("[INFO]Boxplots drawn",T,opt$log,display=FALSE) + +} + +##---------------------- + +###plot microarrays (only for .CEL files)### +if (!is.null(opt$microarray) && dataAreFromCel) { + for (iCondition in 1:nbConditions){ + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".pdf"),collapse=""),onefile = F,width = 5,height = 5)}else{ + png(paste(c("./plotDir/",opt$microarray,"_",correspondanceNameTable[iCondition,1],".png"),collapse="")) + } + image(celData[,iCondition],main=correspondanceNameTable[iCondition,2]) + dev.off() + } + addComment("[INFO]Microarray drawn",T,opt$log,display=FALSE) +} + +##---------------------- + +###plot PCA plot### +if (!is.null(opt$acp)){ + ##to avoid error when nrow is too large, results quite stable with 200k random selected rows + randomSelection=sample(nrow(dataMatrix),min(200000,nrow(dataMatrix))) + #remove constant variables + + dataFiltered=dataMatrix[randomSelection,] + toRemove=which(unlist(apply(dataFiltered,1,var))==0) + if(length(toRemove)>0){ + dataFiltered=dataFiltered[-toRemove,] + } + ##geom_text(aes(label=Experiments,hjust=1, vjust=1.3), y = PC2+0.01) + PACres = prcomp(t(dataFiltered),scale.=TRUE) + + if(!is.null(opt$screePlot)){ + #scree plot + #p <- fviz_eig(PACres) + dataToPlot=data.frame(compo=seq(1,length(PACres$sdev)),var=(PACres$sdev^2/sum(PACres$sdev^2))*100) + p<-ggplot(data=dataToPlot, aes(x=compo, y=var)) + geom_bar(stat="identity", fill="steelblue") + geom_line() + geom_point() + + ggtitle("Scree plot") + theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + + xlab(label="Dimensions") + ylab(label="% explained variances") + scale_x_discrete(limits=dataToPlot$compo) + pp <- ggplotly(p) + + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$screePlot,".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/",opt$screePlot,".png"),collapse="")) + } + plot(p) + dev.off() + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$screePlot,".html"),collapse=""),selfcontained = F) + } + + #now plot pca plots + + if(!is.null(opt$factorInfo)){ + fileIdent="" + symbolset = c("circle","cross","square","diamond","circle-open","square-open","diamond-open","x") + + #save equivalence between real factor names and generic ones in correspondanceNameTable + correspondanceNameTable=rbind(correspondanceNameTable,matrix(c(paste("Factor",1:(ncol(factorInfoMatrix)-1),sep=""),colnames(factorInfoMatrix)[-1]),ncol=2,nrow=ncol(factorInfoMatrix)-1)) + rownames(correspondanceNameTable)=correspondanceNameTable[,2] + + #first order factors from decreasing groups number + orderedFactors=colnames(factorInfoMatrix)[-1][order(unlist(lapply(colnames(factorInfoMatrix)[-1],function(x)length(table(factorInfoMatrix[,x])))),decreasing = T)] + allFactorsBigger=length(table(factorInfoMatrix[,orderedFactors[length(orderedFactors)]]))>length(symbolset) + if(allFactorsBigger)addComment("[WARNING]All factors are composed of too many groups to display two factors at same time, each PCA plot will display only one factor groups",T,opt$log,display=FALSE) + for(iFactor in 1:length(orderedFactors)){ + #if it is the last factor of the list or if all factor + if(iFactor==length(orderedFactors) || allFactorsBigger){ + if(length(orderedFactors)==1 || allFactorsBigger){ + dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=",")))) + p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>% + layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), + legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) + fileIdent=correspondanceNameTable[orderedFactors[iFactor],1] + #add text label to plot + ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) + #save the plotly plot + htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F) + } + }else{ + for(iFactorBis in (iFactor+1):length(orderedFactors)){ + if(length(table(factorInfoMatrix[,orderedFactors[iFactorBis]]))<=length(symbolset)){ + dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x), Attribute1=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactor]], Attribute2=factorInfoMatrix[rownames(PACres$x),orderedFactors[iFactorBis]], hoverLabel=unlist(lapply(rownames(PACres$x),function(x)paste(factorInfoMatrix[x,-1],collapse=",")))) + p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers", color=~Attribute1,colors=rainbow(length(levels(dataToPlot$Attribute1))+2),symbol=~Attribute2,symbols = symbolset,hoverinfo = 'text', text = ~paste(Experiments,"\n",hoverLabel),marker=list(size=5))%>% + layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), + legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) + fileIdent=paste(correspondanceNameTable[orderedFactors[c(iFactor,iFactorBis)],1],collapse="_AND_") + #add text label to plot + ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) + #save the plotly plot + htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_",fileIdent,".html"),collapse=""),selfcontained = F) + }else{ + addComment(c("[WARNING]PCA with",orderedFactors[iFactor],"and",orderedFactors[iFactorBis],"groups cannot be displayed, too many groups (max",length(symbolset),")"),T,opt$log,display=FALSE) + } + } + } + } + }else{ + dataToPlot=data.frame(PC1=PACres$x[,1],PC2=PACres$x[,2],PC3=PACres$x[,3],Experiments=rownames(PACres$x)) + p <- plot_ly(dataToPlot,x = ~PC1, y = ~PC2, z = ~PC3, type = 'scatter3d', mode="markers",marker=list(size=5,color="salmon"),hoverinfo = 'text',text = ~paste(Experiments))%>% + layout(title = "Principal Component Analysis", scene = list(xaxis = list(title = "Component 1"),yaxis = list(title = "Component 2"),zaxis = list(title = "Component 3")), + legend=list(font = list(family = "sans-serif",size = 15,color = "#000"))) + ##p <- add_text(p,x = dataToPlot$PC1, y = dataToPlot$PC2 + (max(PACres$x[,2])-min(PACres$x[,2]))*0.02, z = dataToPlot$PC3, mode = 'text', inherit = F, text=rownames(PACres$x), hoverinfo='skip', showlegend = FALSE, color=I('black')) + + #save plotly files + htmlwidgets::saveWidget(as_widget(p), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$acp,"_plot.html"),collapse=""),selfcontained = F) + } + remove(p,dataToPlot,dataFiltered) + addComment("[INFO]ACP plot drawn",T,opt$log,display=FALSE) +} + +#write correspondances between plot file names and displayed names in figure legends, usefull to define html items in xml file +write.table(correspondanceNameTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F) + +end.time <- Sys.time() +addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) + +addComment("[INFO]End of R script",T,opt$log,display=FALSE) + +printSessionInfo(opt$log) +#sessionInfo()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/General_functions.py Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,206 @@ +import re +import numpy as np + +def get_column_names( file_path, toNotConsider=-1, each=1): + options=[] + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + cpt=0 + for i, field_component in enumerate( firstLine ): + if i!=toNotConsider:#to squeeze the first column + if cpt==0: + options.append( ( field_component, field_component, False ) ) + cpt+=1 + if cpt==each: + cpt=0 + inputfile.close() + return options + +def get_column_names_filteredList( file_path, toNotConsider=[], each=1): + options=[] + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + cpt=0 + for i, field_component in enumerate( firstLine ): + if i not in toNotConsider:#to squeeze the first columns + if cpt==0: + options.append( ( field_component, field_component, False ) ) + cpt+=1 + if cpt==each: + cpt=0 + inputfile.close() + return options + +def get_column_names_mergeNumber(file_path, numberToMerge=1, toNotConsider=[]): + options=[] + inputfile = open(file_path) + if int(numberToMerge)>0: + iHeader=0 + for iCurrentLine in inputfile: + iHeader=iHeader+1 + if iHeader>int(numberToMerge): + break + currentLine=iCurrentLine.strip().split("\t") + iOption=-1 + for i, field_component in enumerate( currentLine ): + if i not in toNotConsider:#to squeeze specified columns + iOption=iOption+1 + if iHeader==1: + options.append( ( str(field_component), str(field_component), False ) ) + else: + options[iOption]=(options[iOption][0]+"_"+str(field_component),options[iOption][1]+"_"+str(field_component),False) + else: + currentLine = next(inputfile).strip().split("\t") + for i, field_component in enumerate( currentLine ): + if i not in toNotConsider:#to squeeze specified columns + options.append( ( "Column_"+str(i), "Column_"+str(i), False ) ) + inputfile.close() + return options + +def get_row_names( file_path, factorName ): + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + iColumn=-1 + for i, field_component in enumerate( firstLine ): + if field_component==factorName:#to test + iColumn=i + options=[] + if iColumn!=-1: + for nextLine in inputfile: + nextLine=nextLine.strip().split("\t") + if len(nextLine)>1: + if (nextLine[iColumn], nextLine[iColumn], False) not in options: + options.append( (nextLine[iColumn], nextLine[iColumn], False) ) + inputfile.close() + return options + +def get_condition_file_names( file_list, toNotConsider=-1, each=1): + options=[] + if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names + inputfile = open(file_list.file_name) + firstLine = next(inputfile).strip().split("\t") + cpt=0 + for i, field_component in enumerate( firstLine ): + if i!=toNotConsider:#to squeeze the first column + if cpt==0: + options.append( ( field_component, field_component, False ) ) + cpt+=1 + if cpt==each: + cpt=0 + inputfile.close() + else:#if input file is a .cel file list or a collection + if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily + for i, field_component in enumerate( file_list ): + options.append( ( field_component.name, field_component.name, False ) ) + else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object + for i, field_component in enumerate( file_list[0].collection.elements ): + options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) + return options + +def generateFactorFile( file_list, factor_list, outputFileName, logFile): + forbidenCharacters={"*",":",",","|"} + outputfile = open(outputFileName, 'w') + outputLog = open(logFile, 'w') + sampleList=[] + if not isinstance(file_list,list): + conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0) + else : + conditionNames=get_condition_file_names(file_list) #.CEL files + for iSample, sample_component in enumerate (conditionNames): + sampleList.append(str(sample_component[1])) + outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n") + globalDict=dict() + factorNameList=[] + firstLine="Conditions" + if len(factor_list)==0:#check if there is at least one factor available + outputLog.write("[ERROR] no factor was defined !\n") + return 1 + else: + for iFactor, factor_component in enumerate( factor_list ): + currentSampleList=list(sampleList) + currentFactor=str(factor_component['factorName']) + #check if factor name contains forbidden characters + for specialCharacter in forbidenCharacters: + if currentFactor.find(specialCharacter)!=-1: + outputLog.write("[ERROR] '"+specialCharacter+"' character is forbidden in factor name : '"+currentFactor+"'\n") + return 4 + #check if factor allready named like that + if not globalDict.get(currentFactor) is None: + outputLog.write("[ERROR] '"+currentFactor+"' is used several times as factor name\n") + return 3 + globalDict[currentFactor]=dict() + firstLine=firstLine+"\t"+currentFactor + factorNameList.append(currentFactor) + if len(factor_component['valueList'])<=1:#check if there is at least two value available + outputLog.write("[ERROR] at least two different values are necessary for '"+currentFactor+"' factor\n") + return 1 + else: + for iValue, value_component in enumerate( factor_component['valueList'] ): + currentValue=str(value_component['valueName']) + #check if factor name contains forbidden characters + for specialCharacter in forbidenCharacters: + if currentValue.find(specialCharacter)!=-1: + outputLog.write("[ERROR] '"+specialCharacter+"' character is forbidden in value name : '"+currentValue+"'\n") + return 4 + currentSample=str(value_component['valueConditions']).split(",") + for iSample, sample_component in enumerate (currentSample): + if not sample_component in currentSampleList: + outputLog.write("[ERROR] sample "+sample_component+" was assigned several times for factor '"+currentFactor+"'\n") + return 2 + currentSampleList.remove(sample_component) + globalDict[currentFactor][sample_component]=currentValue + if(len(currentSampleList)>0): + outputLog.write("[ERROR] for factor '"+currentFactor+"'' sample "+str(currentSampleList)+" are not assigned to any value\n") + return 2 + outputLog.write("[INFO] "+str(len(globalDict))+" factors are detected\n") + #start writing the factor file + outputfile.write(firstLine+"\n") + for iSample, sample_component in enumerate(sampleList): + newLine=sample_component + for iFactor, factor_component in enumerate(factorNameList): + newLine=newLine+"\t"+globalDict[factor_component][sample_component] + outputfile.write(newLine+"\n") + outputfile.close() + outputLog.close() + return 0 + +def selectSubSetTable(file_path,headerLine_number,columnsToAdd,columnNamesToKeep,outputFileName,logFile): + outputLog = open(logFile, 'w') + outputLog.write("[INFO] header line number : "+ headerLine_number+" lines\n") + availableColumnsTuple=get_column_names_mergeNumber(file_path, headerLine_number) + #convert tuple list as a simple array + availableColumns=[] + for iTuple, tuple_content in enumerate (availableColumnsTuple): + availableColumns.append(str(tuple_content[0])) + if len(availableColumns)==0: + outputLog.write("[ERROR] No detected columns in input file\n") + return 1 + selectedColumns=list(columnsToAdd) + for iVolcano, volcano_content in enumerate(columnNamesToKeep): + selectedColumns.append(availableColumns.index(volcano_content['pvalColumn'])) + if volcano_content['fdrColumn'] in availableColumns: + selectedColumns.append(availableColumns.index(volcano_content['fdrColumn'])) + else: + selectedColumns.append(0) + selectedColumns.append(availableColumns.index(volcano_content['fcColumn'])) + if len(selectedColumns)!=(3*len(columnNamesToKeep)+len(columnsToAdd)): + outputLog.write("[ERROR] matching between input file colnames and requested column names failed\n") + return 1 + outputLog.write("[INFO] columns kept : "+str(selectedColumns)+"\n") + #start writting formatted file + inputfile = open(file_path) + outputfile = open(outputFileName, 'w') + iLineCpt=-1 + for iCurrentLine in inputfile: + iLineCpt=iLineCpt+1 + if iLineCpt>=int(headerLine_number): + currentLineFields=np.array(iCurrentLine.strip().split("\t")) + newLine="\t".join(currentLineFields[selectedColumns]) + outputfile.write(newLine+"\n") + if iLineCpt<int(headerLine_number): + outputLog.write("[ERROR] not enough lines in input files ("+(iLineCpt+1)+" lines)\n") + return 1 + inputfile.close() + outputfile.close() + outputLog.close() + return 0 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/LIMMA_options.py Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,330 @@ +import re + +def get_column_names( file_path, toNotConsider=None, toNotConsiderBis=None): + options=[] + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + for i, field_component in enumerate( firstLine ): + if i!=0 and field_component!=toNotConsider and field_component!=toNotConsiderBis:#to squeeze the first column + options.append( ( field_component, field_component, False ) ) + inputfile.close() + return options + +def get_row_names( file_path, factorName ): + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + iColumn=-1 + for i, field_component in enumerate( firstLine ): + if field_component==factorName:#to test + iColumn=i + options=[] + if iColumn!=-1: + for nextLine in inputfile: + nextLine=nextLine.strip().split("\t") + if len(nextLine)>1: + if (nextLine[iColumn], nextLine[iColumn], False) not in options: + options.append( (nextLine[iColumn], nextLine[iColumn], False) ) + inputfile.close() + return options + +def get_row_names_interaction( file_path, factorNameA, factorNameB ): + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + iColumnA=-1 + iColumnB=-1 + for i, field_component in enumerate( firstLine ): + if field_component==factorNameA:#to test + iColumnA=i + if field_component==factorNameB:#to test + iColumnB=i + possibleValuesA=[] + possibleValuesB=[] + if iColumnA!=-1 and iColumnB!=-1: + for nextLine in inputfile: + nextLine=nextLine.strip().split("\t") + if len(nextLine)>1: + if nextLine[iColumnA] not in possibleValuesA: + possibleValuesA.append(nextLine[iColumnA]) + if nextLine[iColumnB] not in possibleValuesB: + possibleValuesB.append(nextLine[iColumnB]) + inputfile.close() + options=[] + if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None": + for counterA in range(len(possibleValuesA)): + for counterB in range(len(possibleValuesB)): + options.append( (possibleValuesA[counterA]+"*"+possibleValuesB[counterB], possibleValuesA[counterA]+"*"+possibleValuesB[counterB], False) ) + return options + +def get_comparisonsA( factorA, valuesA ): + options=[] + formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA)) + possibleValues=formatValuesA.split("', u'") + if len(possibleValues)>=2: + for counter in range(len(possibleValues)-1): + for innerCounter in range(counter+1,len(possibleValues)): + options.append( (possibleValues[counter]+" - "+possibleValues[innerCounter], possibleValues[counter]+" - "+possibleValues[innerCounter], False) ) + options.append( (possibleValues[innerCounter]+" - "+possibleValues[counter], possibleValues[innerCounter]+" - "+possibleValues[counter], False) ) + return options + +def get_comparisonsAB(factorA, valuesA, factorB, valuesB, interaction): + options=[] + formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA)) + possibleValuesA=formatValuesA.split("', u'") + formatValuesB=re.sub("(^\[u')|('\]$)","", str(valuesB)) + possibleValuesB=formatValuesB.split("', u'") + if str(interaction)=="False": + if len(possibleValuesA)>=2: + for counter in range(len(possibleValuesA)-1): + for innerCounter in range(counter+1,len(possibleValuesA)): + options.append( (possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], False) ) + options.append( (possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], False) ) + if len(possibleValuesB)>=2: + for counter in range(len(possibleValuesB)-1): + for innerCounter in range(counter+1,len(possibleValuesB)): + options.append( (possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], False) ) + options.append( (possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], False) ) + else: + if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None": + for counterA in range(len(possibleValuesA)): + for innerCounterA in range(len(possibleValuesA)): + for counterB in range(len(possibleValuesB)): + for innerCounterB in range(len(possibleValuesB)): + if not(counterA==innerCounterA and counterB==innerCounterB): + options.append( ("("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")","("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")", False) ) + return options + +def get_row_names_allInteractions( file_path, factorSelected): + formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected)) + factorsList=formatFactors.split("', u'") + iColumn=[None] * len(factorsList) + valuesList=[None] * len(factorsList) + + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + for iField, fieldComponent in enumerate( firstLine ): + for iFactor, factorComponent in enumerate(factorsList): + if fieldComponent==factorComponent: + iColumn[iFactor]=iField + valuesList[iFactor]=[] + + for nextLine in inputfile: + nextLine=nextLine.strip().split("\t") + if len(nextLine)>1: + for iFactor, factorComponent in enumerate(factorsList): + if nextLine[iColumn[iFactor]] not in valuesList[iFactor]: + valuesList[iFactor].append(nextLine[iColumn[iFactor]]) + inputfile.close() + + allCombinations=[] + for iFactor, factorComponent in enumerate(factorsList): + if iFactor==0: + allCombinations=valuesList[iFactor] + else: + currentCombinations=allCombinations + allCombinations=[] + for iValue, valueComponent in enumerate(valuesList[iFactor]): + for iCombination, combination in enumerate(currentCombinations): + allCombinations.append(combination+"*"+valueComponent) + + options=[] + for iCombination, combination in enumerate(allCombinations): + options.append((combination,combination,False)) + + return options + +def get_allrow_names( file_path, factorSelected ): + formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected)) + factorsList=formatFactors.split("', u'") + iColumn=[None] * len(factorsList) + valuesList=[None] * len(factorsList) + + inputfile = open(file_path) + firstLine = next(inputfile).strip().split("\t") + for iField, fieldComponent in enumerate( firstLine ): + for iFactor, factorComponent in enumerate(factorsList): + if fieldComponent==factorComponent: + iColumn[iFactor]=iField + valuesList[iFactor]=[] + + for nextLine in inputfile: + nextLine=nextLine.strip().split("\t") + if len(nextLine)>1: + for iFactor, factorComponent in enumerate(factorsList): + if nextLine[iColumn[iFactor]] not in valuesList[iFactor]: + valuesList[iFactor].append(nextLine[iColumn[iFactor]]) + inputfile.close() + + allValues=[] + for iFactor, factorComponent in enumerate(factorsList): + for iValue, valueComponent in enumerate(valuesList[iFactor]): + allValues.append(factorComponent+":"+valueComponent) + + options=[] + for iValue, valueComponent in enumerate(allValues): + options.append((valueComponent,valueComponent,False)) + + return options + +def replaceNamesInFiles(expressionFile_name,conditionFile_name,outputExpressionFile,outputConditionFile,ouputDictionnary): + dico={} + forbidenCharacters={"*",":",",","|"} + ##start with expression file, read only the first line + inputfile = open(expressionFile_name) + outputfile = open(outputExpressionFile, 'w') + firstLine = next(inputfile).rstrip().split("\t") + iCondition=1 + newFirstLine="" + for i, field_component in enumerate( firstLine ): + if (i>0): + #conditions names should not be redundant with other conditions + if(field_component not in dico): + dico[field_component]="Condition"+str(iCondition) + newFirstLine+="\t"+"Condition"+str(iCondition) + iCondition+=1 + else: + raise NameError('condition name allready exists!') + else: + newFirstLine+=field_component + outputfile.write(newFirstLine+"\n") + for line in inputfile: + outputfile.write(line) + outputfile.close() + inputfile.close() + #then parse condition file, read all lines in this case + inputfile = open(conditionFile_name) + outputfile = open(outputConditionFile, 'w') + firstLine=1 + iFactor=1 + iValue=1 + for line in inputfile: + currentLine = line.rstrip().split("\t") + newCurrentLine="" + for i, field_component in enumerate( currentLine ): + #special treatment for the first line + if (firstLine==1): + if (i==0): + newCurrentLine=field_component + else: + #factor names should not be redundant with other factors or conditions + if(field_component not in dico): + dico[field_component]="Factor"+str(iFactor) + newCurrentLine+="\t"+"Factor"+str(iFactor) + iFactor+=1 + else: + raise NameError('factor name allready exists!') + else: + if (i==0): + #check if condition name allready exist and used it if it is, or create a new one if not + if(field_component not in dico): + dico[field_component]="Condition"+str(iCondition) + newCurrentLine="Condition"+str(iCondition) + iCondition+=1 + else: + newCurrentLine=dico[field_component] + else: + if(field_component not in dico): + dico[field_component]="Value"+str(iValue) + newCurrentLine+="\tValue"+str(iValue) + iValue+=1 + else: + newCurrentLine+="\t"+dico[field_component] + outputfile.write(newCurrentLine+"\n") + firstLine=0 + outputfile.close() + inputfile.close() + ##check if any entries in dictionnary contains forbiden character + for key, value in dico.items(): + for specialCharacter in forbidenCharacters: + if value.startswith("Condition")==False and key.find(specialCharacter)!=-1: + return 1 + ##then write dictionnary in a additional file + outputfile = open(ouputDictionnary, 'w') + for key, value in dico.items(): + outputfile.write(key+"\t"+value+"\n") + outputfile.close() + return 0 + + +def replaceNamesBlockInFiles(expressionFile_name,conditionFile_name,blockingFile_name,outputExpressionFile,outputConditionFile,outputBlockingFile,ouputDictionnary): + dico={} + forbidenCharacters={"*",":",",","|"} + ##start with expression file, read only the first line + inputfile = open(expressionFile_name) + outputfile = open(outputExpressionFile, 'w') + firstLine = next(inputfile).rstrip().split("\t") + iCondition=1 + newFirstLine="" + for i, field_component in enumerate( firstLine ): + if (i>0): + #conditions names should not be redundant with other conditions + if(field_component not in dico): + dico[field_component]="Condition"+str(iCondition) + newFirstLine+="\t"+"Condition"+str(iCondition) + iCondition+=1 + else: + raise NameError('condition name allready exists!') + else: + newFirstLine+=field_component + outputfile.write(newFirstLine+"\n") + for line in inputfile: + outputfile.write(line) + outputfile.close() + inputfile.close() + #then parse condition file, read all lines in this case + iFactor=1 + iValue=1 + for fileNum in range(2): + if fileNum==0: + inputfile = open(conditionFile_name) + outputfile = open(outputConditionFile, 'w') + else: + inputfile = open(blockingFile_name) + outputfile = open(outputBlockingFile, 'w') + firstLine=1 + for line in inputfile: + currentLine = line.rstrip().split("\t") + newCurrentLine="" + for i, field_component in enumerate( currentLine ): + #special treatment for the first line + if (firstLine==1): + if (i==0): + newCurrentLine=field_component + else: + #factor names should not be redundant with other factors or conditions + if(field_component not in dico): + dico[field_component]="Factor"+str(iFactor) + newCurrentLine+="\t"+"Factor"+str(iFactor) + iFactor+=1 + else: + raise NameError('factor name allready exists!') + else: + if (i==0): + #check if condition name allready exist and used it if it is, or create a new one if not + if(field_component not in dico): + dico[field_component]="Condition"+str(iCondition) + newCurrentLine="Condition"+str(iCondition) + iCondition+=1 + else: + newCurrentLine=dico[field_component] + else: + if(field_component not in dico): + dico[field_component]="Value"+str(iValue) + newCurrentLine+="\tValue"+str(iValue) + iValue+=1 + else: + newCurrentLine+="\t"+dico[field_component] + outputfile.write(newCurrentLine+"\n") + firstLine=0 + outputfile.close() + inputfile.close() + ##check if any entries in dictionnary contains forbiden character + for key, value in dico.items(): + for specialCharacter in forbidenCharacters: + if value.startswith("Condition")==False and key.find(specialCharacter)!=-1: + return 1 + ##then write dictionnary in a additional file + outputfile = open(ouputDictionnary, 'w') + for key, value in dico.items(): + outputfile.write(key+"\t"+value+"\n") + outputfile.close() + return 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/LIMMAscriptV4.R Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,1002 @@ +# A command-line interface for LIMMA to use with Galaxy +# written by Jimmy Vandel +# one of these arguments is required: +# +# +initial.options <- commandArgs(trailingOnly = FALSE) +file.arg.name <- "--file=" +script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) +script.basename <- dirname(script.name) +source(file.path(script.basename, "utils.R")) +source(file.path(script.basename, "getopt.R")) + +#addComment("Welcome R!") + +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +loc <- Sys.setlocale("LC_NUMERIC", "C") + +#get starting time +start.time <- Sys.time() + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs() + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "dataFile", "i", 1, "character", + "factorInfo","a", 1, "character", + "blockingInfo","b", 1, "character", + "dicoRenaming","g",1,"character", + "blockingPolicy","u", 1, "character", + "fdrThreshold","t", 1, "double", + "thresholdFC","d", 1, "double", + "format", "f", 1, "character", + "histo","h", 1, "character", + "volcano","v", 1, "character", + "factorsContrast","r", 1, "character", + "contrastNames","p", 1, "character", + "firstGroupContrast","m", 1, "character", + "secondGroupContrast","n", 1, "character", + "controlGroups","c", 1, "character", + "fratioFile","s",1,"character", + "organismID","x",1,"character", + "rowNameType","y",1,"character", + "quiet", "q", 0, "logical", + "log", "l", 1, "character", + "outputFile" , "o", 1, "character", + "outputDfFile" , "z", 1, "character"), + byrow=TRUE, ncol=4) +opt <- getopt(spec) + +# enforce the following required arguments +if (is.null(opt$log)) { + addComment("[ERROR]'log file' is required\n") + q( "no", 1, F ) +} +addComment("[INFO]Start of R script",T,opt$log,display=FALSE) +if (is.null(opt$dataFile)) { + addComment("[ERROR]'dataFile' is required",T,opt$log) + q( "no", 1, F ) +} +if (!is.null(opt$blockingInfo) && is.null(opt$blockingPolicy) ) { + addComment("[ERROR]blocking policy is missing",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$dicoRenaming)) { + addComment("[ERROR]renaming dictionnary is missing",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$factorsContrast)) { + addComment("[ERROR]factor informations are missing",T,opt$log) + q( "no", 1, F ) +} +if (length(opt$firstGroupContrast)!=length(opt$secondGroupContrast)) { + addComment("[ERROR]some contrast groups seems to be empty",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$factorInfo)) { + addComment("[ERROR]factors info is missing",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$format)) { + addComment("[ERROR]'output format' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$fdrThreshold)) { + addComment("[ERROR]'FDR threshold' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$outputFile) || is.null(opt$outputDfFile)){ + addComment("[ERROR]'output files' are required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$thresholdFC)){ + addComment("[ERROR]'FC threshold' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$fratioFile)) { + addComment("[ERROR]F-ratio parameter is missing",T,opt$log) + q( "no", 1, F ) +} + +#demande si le script sera bavard +verbose <- if (is.null(opt$quiet)) { + TRUE +}else{ + FALSE +} + +#paramètres internes +#pour savoir si on remplace les FC calculés par LIMMA par un calcul du LS-MEAN (ie moyenne de moyennes de chaque groupe dans chaque terme du contraste plutôt qu'une moyenne globale dans chaque terme) +useLSmean=FALSE + +addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) + +addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) +addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) + +#directory for plots +dir.create(file.path(getwd(), "plotDir")) +dir.create(file.path(getwd(), "plotLyDir")) + +#charge des packages silencieusement +suppressPackageStartupMessages({ + library("methods") + library("limma") + library("biomaRt") + library("ggplot2") + library("plotly") + library("stringr") + library("RColorBrewer") +}) + + +#chargement du fichier dictionnaire de renommage +renamingDico=read.csv(file=file.path(getwd(), opt$dicoRenaming),header=F,sep="\t",colClasses="character") +rownames(renamingDico)=renamingDico[,2] + + +#chargement des fichiers en entrée +expDataMatrix=read.csv(file=file.path(getwd(), opt$dataFile),header=F,sep="\t",colClasses="character") +#remove first row to convert it as colnames (to avoid X before colnames with header=T) +colNamesData=expDataMatrix[1,-1] +expDataMatrix=expDataMatrix[-1,] +#remove first colum to convert it as rownames +rowNamesData=expDataMatrix[,1] +expDataMatrix=expDataMatrix[,-1] +if(is.data.frame(expDataMatrix)){ + expDataMatrix=data.matrix(expDataMatrix) +}else{ + expDataMatrix=data.matrix(as.numeric(expDataMatrix)) +} +dimnames(expDataMatrix)=list(rowNamesData,colNamesData) + +#test the number of rows that are constant in dataMatrix +nbConstantRows=length(which(unlist(apply(expDataMatrix,1,var))==0)) +if(nbConstantRows>0){ + addComment(c("[WARNING]",nbConstantRows,"rows are constant across conditions in input data file"),T,opt$log,display=FALSE) +} + +#test if all condition names are present in dico +if(!all(colnames(expDataMatrix) %in% rownames(renamingDico))){ + addComment("[ERROR]Missing condition names in renaming dictionary",T,opt$log) + q( "no", 1, F ) +} + +addComment("[INFO]Expression data loaded and checked",T,opt$log,display=FALSE) +addComment(c("[INFO]Dim of expression matrix:",dim(expDataMatrix)),T,opt$log,display=FALSE) + +#chargement du fichier des facteurs +factorInfoMatrix=read.csv(file=file.path(getwd(), opt$factorInfo),header=F,sep="\t",colClasses="character") +#remove first row to convert it as colnames +colnames(factorInfoMatrix)=factorInfoMatrix[1,] +factorInfoMatrix=factorInfoMatrix[-1,] +#use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case +rownames(factorInfoMatrix)=factorInfoMatrix[,1] + +if(length(setdiff(colnames(expDataMatrix),rownames(factorInfoMatrix)))!=0){ + addComment("[ERROR]Missing samples in factor file",T,opt$log) + q( "no", 1, F ) +} + +#order sample as in expression matrix and remove spurious sample +factorInfoMatrix=factorInfoMatrix[colnames(expDataMatrix),] + +#test if all values names are present in dico +if(!all(unlist(factorInfoMatrix) %in% rownames(renamingDico))){ + addComment("[ERROR]Missing factor names in renaming dictionary",T,opt$log) + q( "no", 1, F ) +} + +addComment("[INFO]Factors OK",T,opt$log,display=FALSE) +addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) + +##manage blocking factor +blockingFactor=NULL +blockinFactorsList=NULL +if(!is.null(opt$blockingInfo)){ + + #chargement du fichier des blocking factors + blockingInfoMatrix=read.csv(file=file.path(getwd(), opt$blockingInfo),header=F,sep="\t",colClasses="character") + #remove first row to convert it as colnames + colnames(blockingInfoMatrix)=blockingInfoMatrix[1,] + blockingInfoMatrix=blockingInfoMatrix[-1,] + #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case + rownames(blockingInfoMatrix)=blockingInfoMatrix[,1] + + + if(length(setdiff(colnames(expDataMatrix),rownames(blockingInfoMatrix)))!=0){ + addComment("[ERROR]Missing samples in blocking factor file",T,opt$log) + q( "no", 1, F ) + } + + #order sample as in expression matrix + blockingInfoMatrix=blockingInfoMatrix[colnames(expDataMatrix),] + + #test if all blocking names are present in dico + if(!all(unlist(blockingInfoMatrix) %in% rownames(renamingDico))){ + addComment("[ERROR]Missing blocking names in renaming dictionary",T,opt$log) + q( "no", 1, F ) + } + + #remove blocking factors allready present as real factors + blockingNotInMainFactors=setdiff(colnames(blockingInfoMatrix)[-1],colnames(factorInfoMatrix)[-1]) + + if(length(blockingNotInMainFactors)<(ncol(blockingInfoMatrix)-1))addComment("[WARNING]Blocking factors cannot be principal factors",T,opt$log,display=FALSE) + + if(length(blockingNotInMainFactors)>0){ + + blockingInfoMatrix=blockingInfoMatrix[,c(colnames(blockingInfoMatrix)[1],blockingNotInMainFactors)] + + groupBlocking=rep("c",ncol(expDataMatrix)) + #for each blocking factor + for(blockingFact in blockingNotInMainFactors){ + if(opt$blockingPolicy=="correlated"){ + indNewFact=as.numeric(factor(blockingInfoMatrix[,blockingFact])) + groupBlocking=paste(groupBlocking,indNewFact,sep="_") + }else{ + if(is.null(blockinFactorsList))blockinFactorsList=list() + blockinFactorsList[[blockingFact]]=factor(unlist(lapply(blockingInfoMatrix[,blockingFact],function(x)paste(c(blockingFact,"_",x),collapse="")))) + } + } + if(opt$blockingPolicy=="correlated"){ + blockingFactor=factor(groupBlocking) + if(length(levels(blockingFactor))==1){ + addComment("[ERROR]Selected blocking factors seems to be constant",T,opt$log) + q( "no", 1, F ) + } + } + addComment("[INFO]Blocking info OK",T,opt$log,display=FALSE) + }else{ + addComment("[WARNING]No blocking factors will be considered",T,opt$log,display=FALSE) + } +} + + +##rename different input parameters using renamingDictionary +opt$factorsContrast=renamingDico[unlist(lapply(unlist(strsplit(opt$factorsContrast,",")),function(x)which(renamingDico[,1]==x))),2] + +userDefinedContrasts=FALSE +if(!is.null(opt$firstGroupContrast) && !is.null(opt$secondGroupContrast)){ + userDefinedContrasts=TRUE + for(iContrast in 1:length(opt$firstGroupContrast)){ + opt$firstGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",") + opt$secondGroupContrast[iContrast]=paste(unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(renamingDico[unlist(lapply(unlist(strsplit(x,"\\*")),function(x)which(renamingDico[,1]==x))),2],collapse="*"))),collapse=",") + } +} + +if(!is.null(opt$controlGroups)){ + renamedGroups=c() + for(iGroup in unlist(strsplit(opt$controlGroups,","))){ + renamedControlGroup=paste(renamingDico[unlist(lapply(unlist(strsplit(iGroup,":")),function(x)which(renamingDico[,1]==x))),2],collapse=":") + if(length(renamedControlGroup)==0 || any(which(unlist(gregexpr(text = renamedControlGroup,pattern = ":"))==-1))){ + addComment("[ERROR]Control groups for interaction seem to mismatch, please check them.",T,opt$log) + q( "no", 1, F ) + } + renamedGroups=c(renamedGroups,renamedControlGroup) + } + opt$controlGroups=renamedGroups +} +addComment("[INFO]Contrast variables are renamed to avoid confusion",T,opt$log,display=FALSE) +##renaming done + +#to convert factor as numeric value --> useless now ? +#expDataMatrix=apply(expDataMatrix,c(1,2),function(x)as.numeric(paste(x))) + +#get factors info for LIMMA +factorsList=list() +for(iFactor in opt$factorsContrast){ + if(!(iFactor %in% colnames(factorInfoMatrix))){ + addComment("[ERROR]Required factors are missing in input file",T,opt$log) + q( "no", 1, F ) + } + factorsList[[iFactor]]=factor(unlist(lapply(factorInfoMatrix[,iFactor],function(x)paste(c(iFactor,"_",x),collapse="")))) + if(length(levels(factorsList[[iFactor]]))==1){ + addComment("[ERROR]One selected factor seems to be constant",T,opt$log) + q( "no", 1, F ) + } +} + +#check if there is at least 2 factors to allow interaction computation +if(!is.null(opt$controlGroups) && length(factorsList)<2){ + addComment("[ERROR]You cannot ask for interaction with less than 2 factors",T,opt$log) + q( "no", 1, F ) +} + +#merge all factors as a single one +factorsMerged=as.character(factorsList[[opt$factorsContrast[1]]]) +for(iFactor in opt$factorsContrast[-1]){ + factorsMerged=paste(factorsMerged,as.character(factorsList[[iFactor]]),sep=".") +} +factorsMerged=factor(factorsMerged) + +#checked that coefficient number (ie. factorsMerged levels) is strictly smaller than sample size +if(length(levels(factorsMerged))>=length(factorsMerged)){ + addComment(c("[ERROR]No enough samples (",length(factorsMerged),") to estimate ",length(levels(factorsMerged))," coefficients"),T,opt$log) + q( "no", 1, F ) +} + +#get the sample size of each factor values +sampleSizeFactor=table(factorsMerged) + + +if(!is.null(blockinFactorsList)){ + factorString=c("blockinFactorsList[['", names(blockinFactorsList)[1],"']]") + for(blockingFact in names(blockinFactorsList)[-1]){ + factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]") + } + design = model.matrix(as.formula(paste(c("~ factorsMerged +",factorString," + 0"),collapse=""))) + + #rename design columns + coeffMeaning = levels(factorsMerged) + for(blockingFact in blockinFactorsList){ + coeffMeaning=c(coeffMeaning,levels(blockingFact)[-1]) + } + colnames(design) = coeffMeaning +}else{ + design = model.matrix(as.formula( ~ factorsMerged + 0)) + + #rename degin columns + coeffMeaning = levels(factorsMerged) + colnames(design) = coeffMeaning +} + +addComment(c("[INFO]Available coefficients: ",coeffMeaning),T,opt$log,display=F) + +estimableCoeff=which(colSums(design)!=0) + +addComment("[INFO]Design done",T,opt$log,display=F) + + #use blocking factor if exists +if(!is.null(blockingFactor)){ + corfit <- duplicateCorrelation(expDataMatrix, design, block=blockingFactor) + + addComment(c("[INFO]Correlation within groups: ",corfit$consensus.correlation),T,opt$log,display=F) + + #run linear model fit + data.fit = lmFit(expDataMatrix,design,block = blockingFactor, correlation=corfit$consensus.correlation) +}else{ + #run linear model fit + data.fit = lmFit(expDataMatrix,design) +} + +estimatedCoeff=which(!is.na(data.fit$coefficients[1,])) + +addComment("[INFO]Lmfit done",T,opt$log,display=F) + +#catch situation where some coefficients cannot be estimated, probably due to dependances between design columns +#if(length(setdiff(estimableCoeff,estimatedCoeff))>0){ +# addComment("[ERROR]Error in design matrix, check your group definitions",T,opt$log) +# q( "no", 1, F ) +#} +#to strong condition, should return ERROR only when coefficients relative to principal factors cannot be estimated, otherwise, return a simple WARNING + +#define requested contrasts +requiredContrasts=c() +humanReadingContrasts=c() +persoContrastName=c() +if(userDefinedContrasts){ + for(iContrast in 1:length(opt$firstGroupContrast)){ + posGroup=unlist(lapply(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse="."))) + negGroup=unlist(lapply(unlist(strsplit(opt$secondGroupContrast[iContrast],",")),function(x)paste(paste(opt$factorsContrast,unlist(strsplit(x,"\\*")),sep="_"),collapse="."))) + #clear posGroup and negGroup from empty groups + emptyPosGroups=which(!(posGroup%in%coeffMeaning)) + if(length(emptyPosGroups)>0){ + addComment(c("[WARNING]The group(s)",posGroup[emptyPosGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE) + posGroup=posGroup[-emptyPosGroups] + currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],","))[-emptyPosGroups],collapse="+") + }else{ + currentHumanContrast=paste(unlist(strsplit(opt$firstGroupContrast[iContrast],",")),collapse="+") + } + emptyNegGroups=which(!(negGroup%in%coeffMeaning)) + if(length(emptyNegGroups)>0){ + addComment(c("[WARNING]The group(s)",negGroup[emptyNegGroups],"is/are removed from contrast as it/they is/are empty"),T,opt$log,display=FALSE) + negGroup=negGroup[-emptyNegGroups] + currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))[-emptyNegGroups]),collapse="-") + }else{ + currentHumanContrast=paste(c(currentHumanContrast,unlist(strsplit(opt$secondGroupContrast[iContrast],","))),collapse="-") + } + if(length(posGroup)==0 || length(negGroup)==0 ){ + addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to empty group"),T,opt$log,display=FALSE) + }else{ + if(all(posGroup%in%negGroup) && all(negGroup%in%posGroup)){ + addComment(c("[WARNING]Contrast",currentHumanContrast,"cannot be estimated due to null contrast"),T,opt$log,display=FALSE) + }else{ + #get coefficients required for first group added as positive + positiveCoeffWeights=sampleSizeFactor[posGroup]/sum(sampleSizeFactor[posGroup]) + #positiveCoeffWeights=rep(1,length(posGroup)) + #names(positiveCoeffWeights)=names(sampleSizeFactor[posGroup]) + #get coefficients required for second group added as negative + negativeCoeffWeights=sampleSizeFactor[negGroup]/sum(sampleSizeFactor[negGroup]) + #negativeCoeffWeights=rep(1,length(negGroup)) + #names(negativeCoeffWeights)=names(sampleSizeFactor[negGroup]) + #build the resulting contrast + currentContrast=paste(paste(positiveCoeffWeights[posGroup],posGroup,sep="*"),collapse="+") + currentContrast=paste(c(currentContrast,paste(paste(negativeCoeffWeights[negGroup],negGroup,sep="*"),collapse="-")),collapse="-") + requiredContrasts=c(requiredContrasts,currentContrast) + + #build the human reading contrast + humanReadingContrasts=c(humanReadingContrasts,currentHumanContrast) + if(!is.null(opt$contrastNames) && nchar(opt$contrastNames[iContrast])>0){ + persoContrastName=c(persoContrastName,opt$contrastNames[iContrast]) + }else{ + persoContrastName=c(persoContrastName,"") + } + + addComment(c("[INFO]Contrast added : ",currentHumanContrast),T,opt$log,display=F) + addComment(c("with complete formula ",currentContrast),T,opt$log,display=F) + } + } + } +} + + + #define the true formula with interactions to get interaction coefficients + factorString=c("factorsList[['", names(factorsList)[1],"']]") + for(iFactor in names(factorsList)[-1]){ + factorString=c(factorString," * factorsList[['",iFactor,"']]") + } + + if(!is.null(blockinFactorsList)){ + for(blockingFact in names(blockinFactorsList)){ + factorString=c(factorString," + blockinFactorsList[['",blockingFact,"']]") + } + } + + #should not be null at the end + allFtestMeanSquare=NULL + #to get the F-test values + estimatedInteractions=rownames(anova(lm(as.formula(paste(c("expDataMatrix[1,] ~ ",factorString),collapse=""))))) + estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x){temp=unlist(strsplit(x,"[ \" | : ]"));paste(temp[seq(2,length(temp),3)],collapse=":")})),estimatedInteractions[length(estimatedInteractions)]) + #rename estimated interaction terms using renamingDico + estimatedInteractions=c(unlist(lapply(estimatedInteractions[-length(estimatedInteractions)],function(x)paste(renamingDico[unlist(strsplit(x,":")),1],collapse=":"))),estimatedInteractions[length(estimatedInteractions)]) + t <- unlist(apply(expDataMatrix,1,function(x){temp=anova(lm(as.formula(paste(c("x ~ ",factorString),collapse=""))))$`Mean Sq`;temp/temp[length(temp)]})) + allFtestMeanSquare <- t(matrix(t,nrow=length(estimatedInteractions))) + #remove from allFtest rows containing NA + if(length(which(is.na(allFtestMeanSquare[,1])))>0)allFtestMeanSquare=allFtestMeanSquare[-(which(is.na(allFtestMeanSquare[,1]))),] + colnames(allFtestMeanSquare)=estimatedInteractions + + #add contrasts corresponding to interaction terms + if(!is.null(opt$controlGroups)){ + #first load user defined control group for each factor + controlGroup=rep(NA,length(factorsList)) + names(controlGroup)=names(factorsList) + for(iGroup in opt$controlGroups){ + splitGroup=unlist(strsplit(iGroup,":")) + splitGroup[2]=paste(splitGroup[1],splitGroup[2],sep = "_") + #check if defined control group is really a level of the corresponding factor + if(!splitGroup[1]%in%names(controlGroup) || !splitGroup[2]%in%factorsList[[splitGroup[1]]]){ + addComment(c("[ERROR]The factor name",splitGroup[1],"does not exist or group name",splitGroup[2]),T,opt$log) + q( "no", 1, F ) + } + if(!is.na(controlGroup[splitGroup[1]])){ + addComment("[ERROR]Several control groups are defined for the same factor, please select only one control group for each factor if you want to compute interaction contrasts",T,opt$log) + q( "no", 1, F ) + } + controlGroup[splitGroup[1]]=splitGroup[2] + } + + #check if all factor have a defined control group + if(any(is.na(controlGroup))){ + addComment("[ERROR]Missing control group for some factors, please check them if you want to compute interaction contrasts",T,opt$log) + q( "no", 1, F ) + } + + nbFactors=length(factorsList) + interactionContrasts=c() + contrastClass=c() + #initialize list for the first level + newPreviousLoopContrast=list() + for(iFactorA in 1:(nbFactors-1)){ + nameFactorA=names(factorsList)[iFactorA] + compA=c() + for(levelA in setdiff(levels(factorsList[[iFactorA]]),controlGroup[nameFactorA])){ + compA=c(compA,paste(levelA,controlGroup[nameFactorA],sep="-")) + } + newPreviousLoopContrast[[as.character(iFactorA)]]=compA + } + #make a loop for growing interaction set + for(globalIfactor in 1:(nbFactors-1)){ + previousLoopContrast=newPreviousLoopContrast + newPreviousLoopContrast=list() + #factor A reuse contrasts made at previsous loop + for(iFactorA in names(previousLoopContrast)){ + compA=previousLoopContrast[[iFactorA]] + + if(max(as.integer(unlist(strsplit(iFactorA,"\\."))))<nbFactors){ + #factor B is the new factor to include in intreraction set + for(iFactorB in (max(as.integer(unlist(strsplit(iFactorA,"\\."))))+1):nbFactors){ + nameFactorB=names(factorsList)[iFactorB] + #keep contrasts identified trough interacting factors set + newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c() + for(iCompA in compA){ + for(levelB in setdiff(levels(factorsList[[iFactorB]]),controlGroup[nameFactorB])){ + #decompose the contrast compA to apply the new level of factor B on each term + temp=unlist(strsplit(iCompA,"[ + ]")) + splitCompA=temp[1] + for(iTemp in temp[-1])splitCompA=c(splitCompA,"+",iTemp) + splitCompA=unlist(lapply(splitCompA,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) + #apply on each contrast term the new level of factor B + firstTerm=paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,levelB,sep=".")}else{x})),collapse="") + secondTerm=negativeExpression(paste(unlist(lapply(splitCompA,function(x)if(x!="+" && x!="-"){paste(x,controlGroup[nameFactorB],sep=".")}else{x})),collapse="")) + currentContrast=paste(c(firstTerm,secondTerm),collapse="") + + newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]]=c(newPreviousLoopContrast[[paste(iFactorA,iFactorB,sep=".")]],currentContrast) + } + } + } + } + } + for(iContrast in names(newPreviousLoopContrast)){ + contrastClass=c(contrastClass,rep(iContrast,length(newPreviousLoopContrast[[iContrast]]))) + } + interactionContrasts=c(interactionContrasts,unlist(newPreviousLoopContrast)) + } + #make human title for interactions + names(interactionContrasts)=contrastClass + humanReadingInteraction=unlist(lapply(interactionContrasts,function(x)gsub("\\.",":",unlist(strsplit(x,"[+-]"))[1]))) + + contrastToIgnore=c() + + #complete with control groups and order to match to coeffs + for(iContrast in 1:length(interactionContrasts)){ + missingFactor=setdiff(1:nbFactors,as.integer(unlist(strsplit(names(interactionContrasts[iContrast]),"\\.")))) + #decompose the contrast + temp=unlist(strsplit(interactionContrasts[iContrast],"[ + ]")) + splitContrast=temp[1] + for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp) + splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) + for(iFactor in missingFactor){ + for(iTerm in 1:length(splitContrast)){ + if(splitContrast[iTerm]!="+" && splitContrast[iTerm]!="-"){ + splitTerm=unlist(strsplit(splitContrast[iTerm],"\\.")) + if(iFactor==1)splitContrast[iTerm]=paste(c(controlGroup[names(factorsList)[iFactor]],splitTerm),collapse=".") + if(iFactor==nbFactors)splitContrast[iTerm]=paste(c(splitTerm,controlGroup[names(factorsList)[iFactor]]),collapse=".") + if(iFactor>1 && iFactor<nbFactors)splitContrast[iTerm]=paste(c(splitTerm[1:(iFactor-1)],controlGroup[names(factorsList)[iFactor]],splitTerm[iFactor:length(splitTerm)]),collapse=".") + } + } + } + interactionContrasts[iContrast]=paste(splitContrast,collapse="") + if(all(splitContrast[seq(1,length(splitContrast),2)]%in%coeffMeaning)){ + addComment(c("[INFO]Interaction contrast added : ",humanReadingInteraction[iContrast]),T,opt$log,display=F) + addComment(c("with complete formula ",interactionContrasts[iContrast]),T,opt$log,display=F) + }else{ + contrastToIgnore=c(contrastToIgnore,iContrast) + addComment(c("[WARNING]Interaction contrast",humanReadingInteraction[iContrast],"is removed due to empty group"),T,opt$log,display=F) + } + } + + #add interaction contrasts to global contrast list + if(length(contrastToIgnore)>0){ + requiredContrasts=c(requiredContrasts,interactionContrasts[-contrastToIgnore]) + humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction[-contrastToIgnore]) + persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction[-contrastToIgnore]))) + }else{ + requiredContrasts=c(requiredContrasts,interactionContrasts) + humanReadingContrasts=c(humanReadingContrasts,humanReadingInteraction) + persoContrastName=c(persoContrastName,rep("",length(humanReadingInteraction))) + } + }#end of intreaction contrasts + + + #remove from requiredContrasts contrasts that cannot be estimated + toRemove=unique(unlist(lapply(setdiff(coeffMeaning,names(estimatedCoeff)),function(x)grep(x,requiredContrasts)))) + if(length(toRemove)>0){ + addComment(c("[WARNING]",length(toRemove)," contrasts are removed, due to missing coefficients"),T,opt$log,display=FALSE) + requiredContrasts=requiredContrasts[-toRemove] + humanReadingContrasts=humanReadingContrasts[-toRemove] + persoContrastName=persoContrastName[-toRemove] + } + + if(length(requiredContrasts)==0){ + addComment("[ERROR]No contrast to compute, please check your contrast definition.",T,opt$log) + q( "no", 1, F ) + } + + #compute for each contrast mean of coefficients in posGroup and negGroup for FC computation of log(FC) with LSmean as in Partek + meanPosGroup=list() + meanNegGroup=list() + for(iContrast in 1:length(requiredContrasts)){ + #define posGroup and negGroup + #first split contrast + temp=unlist(strsplit(requiredContrasts[iContrast],"[ + ]")) + splitContrast=temp[1] + for(iTemp in temp[-1])splitContrast=c(splitContrast,"+",iTemp) + splitContrast=unlist(lapply(splitContrast,function(x){temp=unlist(strsplit(x,"-"));splitCompB=temp[1];for(iTemp in temp[-1])splitCompB=c(splitCompB,"-",iTemp);splitCompB})) + #and then put each term in good group + posGroup=c() + negGroup=c() + nextIsPos=TRUE + for(iSplit in splitContrast){ + if(iSplit=="+")nextIsPos=TRUE + if(iSplit=="-")nextIsPos=FALSE + if(iSplit!="-" && iSplit!="+"){ + #remove weights of contrast terms + iSplitBis=unlist(strsplit(iSplit,"[*]")) + iSplitBis=iSplitBis[length(iSplitBis)] + if(nextIsPos)posGroup=c(posGroup,iSplitBis) + else negGroup=c(negGroup,iSplitBis) + } + } + #compute means for each group + meanPosGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,posGroup],ncol=length(posGroup)),1,mean) + meanNegGroup[[iContrast]]=apply(as.matrix(data.fit$coefficients[,negGroup],ncol=length(negGroup)),1,mean) + } + + + contrast.matrix = makeContrasts(contrasts=requiredContrasts,levels=design) + data.fit.con = contrasts.fit(data.fit,contrast.matrix) + + addComment("[INFO]Contrast definition done",T,opt$log,T,display=FALSE) + + #compute LIMMA statistics + data.fit.eb = eBayes(data.fit.con) + + addComment("[INFO]Estimation done",T,opt$log,T,display=FALSE) + + #adjust p.value through FDR + data.fit.eb$adj_p.value=data.fit.eb$p.value + for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){ + data.fit.eb$adj_p.value[,iComparison]=p.adjust(data.fit.eb$p.value[,iComparison],"fdr") + } + + #add a new field based on LS-means for each contrast instead of global mean like they were calculated in coefficients field + data.fit.eb$coefficientsLS=data.fit.eb$coefficients + if(ncol(data.fit.eb$coefficients)!=length(meanPosGroup)){ + addComment("[ERROR]Estimated contrasts number unexpected",T,opt$log) + q( "no", 1, F ) + } + for(iContrast in 1:length(meanPosGroup)){ + data.fit.eb$coefficientsLS[,iContrast]=meanPosGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)]-meanNegGroup[[iContrast]][rownames(data.fit.eb$coefficientsLS)] + } + + #if requested replace coefficient computed as global mean by LS-means values + if(useLSmean)data.fit.eb$coefficients=data.fit.eb$coefficientsLS + +addComment("[INFO]Core treatment done",T,opt$log,T,display=FALSE) + + +##convert humanReadingContrasts with namingDictionary to create humanReadingContrastsRenamed and keep original humanReadingContrasts names for file names +humanReadingContrastsRenamed=rep("",length(humanReadingContrasts)) +for(iContrast in 1:length(humanReadingContrasts)){ + if(persoContrastName[iContrast]==""){ + #if(verbose)addComment(humanReadingContrasts[iContrast]) + specialCharacters=str_extract_all(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]] + #if(verbose)addComment(specialCharacters) + nameConverted=unlist(lapply(strsplit(humanReadingContrasts[iContrast],"[+|*|_|:|-]")[[1]],function(x)renamingDico[x,1])) + #if(verbose)addComment(nameConverted) + humanReadingContrastsRenamed[iContrast]=paste(nameConverted,specialCharacters,collapse="",sep="") + #if(verbose)addComment(humanReadingContrastsRenamed[iContrast]) + humanReadingContrastsRenamed[iContrast]=substr(humanReadingContrastsRenamed[iContrast],1,nchar(humanReadingContrastsRenamed[iContrast])-1) + }else{ + humanReadingContrastsRenamed[iContrast]=persoContrastName[iContrast] + } +} + +#write correspondances between plot file names (humanReadingContrasts) and displayed names in figure legends (humanReadingContrastsRenamed), usefull to define html items in xml file +correspondanceTable=matrix("",ncol=2,nrow=ncol(data.fit.eb$p.value)) +correspondanceTable[,1]=unlist(lapply(humanReadingContrasts,function(x)gsub(":","_INT_",gsub("\\+","_PLUS_",gsub("\\*","_AND_",x))))) +correspondanceTable[,2]=humanReadingContrastsRenamed +rownames(correspondanceTable)=correspondanceTable[,2] +write.table(correspondanceTable,file=file.path(getwd(), "correspondanceFileNames.csv"),quote=FALSE,sep="\t",col.names = F,row.names = F) + +#plot nominal p-val histograms for selected comparisons +histogramPerPage=6 +if (!is.null(opt$histo)) { + iToPlot=1 + plotVector=list() + nbComparisons=ncol(data.fit.eb$p.value) + for (iComparison in 1:nbComparisons){ + dataToPlot=data.frame(pval=data.fit.eb$p.value[,iComparison],id=rownames(data.fit.eb$p.value)) + p <- ggplot(data=dataToPlot, aes(x=pval)) + geom_histogram(colour="red", fill="salmon") + + theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="Frequencies") + xlab(label="Nominal p-val") + + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + plotVector[[length(plotVector)+1]]=p + + pp <- ggplotly(p) + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$histo,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F) + + if(iComparison==nbComparisons || length(plotVector)==histogramPerPage){ + #plot and close the actual plot + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$histo,iToPlot,".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/",opt$histo,iToPlot,".png"),collapse="")) + } + multiplot(plotlist=plotVector,cols=2) + dev.off() + if(iComparison<nbComparisons){ + #prepare for a new plotting file if necessary + plotVector=list() + iToPlot=iToPlot+1 + } + } + } + addComment("[INFO]Histograms drawn",T,opt$log,T,display=FALSE) + +} + +#plot F-test sum square barplot +if(!is.null(allFtestMeanSquare)){ + dataToPlot=data.frame(Fratio=apply(allFtestMeanSquare,2,mean),Factors=factor(colnames(allFtestMeanSquare),levels = colnames(allFtestMeanSquare))) + + p <- ggplot(data=dataToPlot, aes(x=Factors, y=Fratio, fill=Factors)) + + geom_bar(stat="identity") + scale_fill_manual(values = colorRampPalette(brewer.pal(9,"Set1"))(ncol(allFtestMeanSquare))[sample(ncol(allFtestMeanSquare))]) + ylab(label="mean F-ratio") + + theme_bw() + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5)) + ggtitle("Source of variation") + + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$fratioFile,".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/",opt$fratioFile,".png"),collapse="")) + } + plot(p) + dev.off() + + pp <- ggplotly(p) + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$fratioFile,".html"),collapse=""),selfcontained = F) + + addComment("[INFO]SumSquareTest drawn",T,opt$log,T,display=FALSE) +} + +#plot VOLCANO plot +#volcanoplot(data.fit.eb,coef=1,highlight=10) +volcanoPerPage=1 +logFCthreshold=log2(opt$thresholdFC) +if (!is.null(opt$volcano)) { + iToPlot=1 + plotVector=list() + nbComparisons=ncol(data.fit.eb$adj_p.value) + for (iComparison in 1:nbComparisons){ + + #define the log10(p-val) threshold corresponding to FDR threshold fixed by user + probeWithLowFDR=-log10(data.fit.eb$p.value[which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold),iComparison]) + pvalThresholdFDR=NULL + if(length(probeWithLowFDR)>0)pvalThresholdFDR=min(probeWithLowFDR) + + #get significant points over FC and FDR thresholds + significativePoints=intersect(which(abs(data.fit.eb$coefficients[,iComparison])>=logFCthreshold),which(data.fit.eb$adj_p.value[,iComparison]<=opt$fdrThreshold)) + + #to reduce size of html plot, we keep 20000 points maximum sampled amongst genes with pval>=33%(pval) and abs(log2(FC))<=66%(abs(log2(FC))) + htmlPointsToRemove=intersect(which(abs(data.fit.eb$coefficients[,iComparison])<=quantile(abs(data.fit.eb$coefficients[,iComparison]),c(0.66))),which(data.fit.eb$p.value[,iComparison]>=quantile(abs(data.fit.eb$p.value[,iComparison]),c(0.33)))) + if(length(htmlPointsToRemove)>20000){ + htmlPointsToRemove=setdiff(htmlPointsToRemove,sample(htmlPointsToRemove,20000)) + }else{ + htmlPointsToRemove=c() + } + + xMinLimPlot=min(data.fit.eb$coefficients[,iComparison])-0.2 + xMaxLimPlot=max(data.fit.eb$coefficients[,iComparison])+0.2 + yMaxLimPlot= max(-log10(data.fit.eb$p.value[,iComparison]))+0.2 + + if(length(significativePoints)>0){ + dataSignifToPlot=data.frame(pval=-log10(data.fit.eb$p.value[significativePoints,iComparison]),FC=data.fit.eb$coefficients[significativePoints,iComparison],description=paste(names(data.fit.eb$coefficients[significativePoints,iComparison]),"\n","FC: " , round(2^data.fit.eb$coefficients[significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[significativePoints,iComparison],digits=4), sep="")) + #to test if remains any normal points to draw + if(length(significativePoints)<nrow(data.fit.eb$p.value)){ + dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-significativePoints,iComparison]),FC=data.fit.eb$coefficients[-significativePoints,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-significativePoints,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[-significativePoints,iComparison],digits=4), sep="")) + }else{ + dataToPlot=data.frame(pval=0,FC=0,description="null") + } + }else{ + dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[,iComparison]),FC=data.fit.eb$coefficients[,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[,iComparison],2) , " | FDR p-val: ",prettyNum(data.fit.eb$adj_p.value[,iComparison],digits=4), sep="")) + } + + ##traditional plot + p <- ggplot(data=dataToPlot, aes(x=FC, y=pval)) + geom_point() + + theme_bw() + ggtitle(humanReadingContrastsRenamed[iComparison]) + ylab(label="-log10(p-val)") + xlab(label="Log2 Fold Change") + + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position="none") + if(logFCthreshold!=0) p <- p + geom_vline(xintercept=-logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_vline(xintercept=logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_text(data.frame(text=c(paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),paste(c("log2(FC=",opt$thresholdFC,")"),collapse="")),x=c(-logFCthreshold,logFCthreshold),y=c(0,0)),mapping=aes(x=x, y=y, label=text), size=4, angle=90, vjust=-0.4, hjust=0, color="salmon") + if(!is.null(pvalThresholdFDR)) p <- p + geom_hline(yintercept=pvalThresholdFDR, color="skyblue1",linetype="dotted", size=0.5) + geom_text(data.frame(text=c(paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse="")),x=c(xMinLimPlot),y=c(pvalThresholdFDR)),mapping=aes(x=x, y=y, label=text), size=4, vjust=0, hjust=0, color="skyblue3") + if(length(significativePoints)>0)p <- p + geom_point(data=dataSignifToPlot,aes(colour=description)) + + ##interactive plot + if(length(htmlPointsToRemove)>0){ + pointToRemove=union(htmlPointsToRemove,significativePoints) + #to test if remains any normal points to draw + if(length(pointToRemove)<nrow(data.fit.eb$p.value)){ + dataToPlot=data.frame(pval=-log10(data.fit.eb$p.value[-pointToRemove,iComparison]),FC=data.fit.eb$coefficients[-pointToRemove,iComparison],description=paste("FC: " , round(2^data.fit.eb$coefficients[-pointToRemove,iComparison],2) , " | FDR p-val: ", prettyNum(data.fit.eb$adj_p.value[-pointToRemove,iComparison],digits=4), sep="")) + }else{ + dataToPlot=data.frame(pval=0,FC=0,description="null") + } + } + + if((nrow(dataToPlot)+nrow(dataSignifToPlot))>40000)addComment(c("[WARNING]For",humanReadingContrastsRenamed[iComparison],"volcano, numerous points to plot(",nrow(dataToPlot)+nrow(dataSignifToPlot),"), resulting volcano could be heavy, using more stringent thresholds could be helpful."),T,opt$log) + + phtml <- plot_ly(data=dataToPlot, x=~FC, y=~pval,type="scatter", mode="markers",showlegend = FALSE, marker = list(color="gray",opacity=0.5), text=~description, hoverinfo="text") %>% + layout(title = humanReadingContrastsRenamed[iComparison],xaxis=list(title="Log2 Fold Change",showgrid=TRUE, zeroline=FALSE),yaxis=list(title="-log10(p-val)", showgrid=TRUE, zeroline=FALSE)) + if(length(significativePoints)>0) phtml=add_markers(phtml,data=dataSignifToPlot, x=~FC, y=~pval, mode="markers" , marker=list( color=log10(abs(dataSignifToPlot$FC)*dataSignifToPlot$pval),colorscale='Rainbow'), text=~description, hoverinfo="text", inherit = FALSE) %>% hide_colorbar() + if(logFCthreshold!=0){ + phtml=add_trace(phtml,x=c(-logFCthreshold,-logFCthreshold), y=c(0,yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) + phtml=add_annotations(phtml,x=-logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(1/FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) + phtml=add_trace(phtml,x=c(logFCthreshold,logFCthreshold), y=c(0, yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) + phtml=add_annotations(phtml,x=logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(FC=",opt$thresholdFC,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) + } + if(!is.null(pvalThresholdFDR)){ + phtml=add_trace(phtml,x=c(xMinLimPlot,xMaxLimPlot), y=c(pvalThresholdFDR,pvalThresholdFDR), type="scatter", mode = "lines", line=list(color="cornflowerblue",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) + phtml=add_annotations(phtml,x=xMinLimPlot,y=pvalThresholdFDR+0.1,xref = "x",yref = "y",text = paste(c("FDR pval limit(",opt$fdrThreshold,")"),collapse=""),xanchor = 'left',showarrow = F,font=list(color="cornflowerblue")) + } + plotVector[[length(plotVector)+1]]=p + + #save plotly files + pp <- ggplotly(phtml) + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".html"),collapse=""),selfcontained = F) + + + if(iComparison==nbComparisons || length(plotVector)==volcanoPerPage){ + #plot and close the actual plot + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/",opt$volcano,"_",correspondanceTable[humanReadingContrastsRenamed[iComparison],1],".png"),collapse="")) + } + multiplot(plotlist=plotVector,cols=1) + dev.off() + if(iComparison<nbComparisons){ + #prepare for a new ploting file if necessary + plotVector=list() + iToPlot=iToPlot+1 + } + } + } + remove(dataToPlot,dataSignifToPlot) + addComment("[INFO]Volcanos drawn",T,opt$log,T,display=FALSE) +} + +rowItemInfo=NULL +if(!is.null(opt$rowNameType) && !is.null(opt$organismID)){ +##get gene information from BioMart +#if(!require("biomaRt")){ +# source("https://bioconductor.org/biocLite.R") +# biocLite("biomaRt") +#} + +ensembl_hs_mart <- useMart(biomart="ensembl", dataset=opt$organismID) +ensembl_df <- getBM(attributes=c(opt$rowNameType,"description"),mart=ensembl_hs_mart) +rowItemInfo=ensembl_df[which(ensembl_df[,1]!=""),2] +rowItemInfo=unlist(lapply(rowItemInfo,function(x)substr(unlist(strsplit(x," \\[Source"))[1],1,30))) +names(rowItemInfo)=ensembl_df[which(ensembl_df[,1]!=""),1] +} + +#write(unlist(dimnames(data.fit.eb$adj_p.value)),opt$log,append = T) + +#prepare additional output containing df informations +dfMatrix=matrix(0,ncol=3,nrow = nrow(data.fit.eb$coefficients),dimnames = list(rownames(data.fit.eb$coefficients),c("df.residual","df.prior","df.total"))) +dfMatrix[,"df.residual"]=data.fit.eb$df.residual +dfMatrix[,"df.prior"]=data.fit.eb$df.prior +dfMatrix[,"df.total"]=data.fit.eb$df.total + +#filter out genes with higher p-values for all comparisons +genesToKeep=names(which(apply(data.fit.eb$adj_p.value,1,function(x)length(which(x<=opt$fdrThreshold))>0))) +#filter out genes with lower FC for all comparisons +genesToKeep=intersect(genesToKeep,names(which(apply(data.fit.eb$coefficients,1,function(x)length(which(abs(x)>=logFCthreshold))>0)))) + +if(length(genesToKeep)>0){ + data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[genesToKeep,],ncol=ncol(data.fit.eb$adj_p.value)) + rownames(data.fit.eb$adj_p.value)=genesToKeep + colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value) + + data.fit.eb$p.value=matrix(data.fit.eb$p.value[genesToKeep,],ncol=ncol(data.fit.eb$p.value)) + rownames(data.fit.eb$p.value)=genesToKeep + colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value) + + data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[genesToKeep,],ncol=ncol(data.fit.eb$coefficients)) + rownames(data.fit.eb$coefficients)=genesToKeep + colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value) + + data.fit.eb$t=matrix(data.fit.eb$t[genesToKeep,],ncol=ncol(data.fit.eb$t)) + rownames(data.fit.eb$t)=genesToKeep + colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value) + + dfMatrix=dfMatrix[genesToKeep,,drop=FALSE] + +}else{ + addComment(c("[WARNING]No significative genes considering the given FDR threshold : ",opt$fdrThreshold),T,opt$log,display=FALSE) +} + +addComment("[INFO]Significant genes filtering done",T,opt$log,T,display=FALSE) + + +#plot VennDiagramm for genes below threshold between comparisons +#t=apply(data.fit.eb$adj_p.value[,1:4],2,function(x)names(which(x<=opt$threshold))) +#get.venn.partitions(t) +#vennCounts(data.fit.eb$adj_p.value[,1:4]<=opt$threshold) + +#make a simple sort genes based only on the first comparison +#newOrder=order(data.fit.eb$adj_p.value[,1]) +#data.fit.eb$adj_p.value=data.fit.eb$adj_p.value[newOrder,] + +#alternative sorting strategy based on the mean gene rank over all comparisons +if(length(genesToKeep)>1){ + currentRank=rep(0,nrow(data.fit.eb$adj_p.value)) + for(iComparison in 1:ncol(data.fit.eb$adj_p.value)){ + currentRank=currentRank+rank(data.fit.eb$adj_p.value[,iComparison]) + } + currentRank=currentRank/ncol(data.fit.eb$adj_p.value) + newOrder=order(currentRank) + + data.fit.eb$adj_p.value=matrix(data.fit.eb$adj_p.value[newOrder,],ncol=ncol(data.fit.eb$adj_p.value)) + rownames(data.fit.eb$adj_p.value)=rownames(data.fit.eb$p.value)[newOrder] + colnames(data.fit.eb$adj_p.value)=colnames(data.fit.eb$p.value) + + data.fit.eb$p.value=matrix(data.fit.eb$p.value[newOrder,],ncol=ncol(data.fit.eb$p.value)) + rownames(data.fit.eb$p.value)=rownames(data.fit.eb$adj_p.value) + colnames(data.fit.eb$p.value)=colnames(data.fit.eb$adj_p.value) + + data.fit.eb$coefficients=matrix(data.fit.eb$coefficients[newOrder,],ncol=ncol(data.fit.eb$coefficients)) + rownames(data.fit.eb$coefficients)=rownames(data.fit.eb$adj_p.value) + colnames(data.fit.eb$coefficients)=colnames(data.fit.eb$adj_p.value) + + data.fit.eb$t=matrix(data.fit.eb$t[newOrder,],ncol=ncol(data.fit.eb$t)) + rownames(data.fit.eb$t)=rownames(data.fit.eb$adj_p.value) + colnames(data.fit.eb$t)=colnames(data.fit.eb$adj_p.value) + + dfMatrix=dfMatrix[newOrder,,drop=FALSE] +} + + +#formating output matrices depending on genes to keep +if(length(genesToKeep)==0){ + outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3) + outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) + outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) + outputData[,1]=c("LIMMA","Gene","noGene") + outputData[,2]=c("Comparison","Info","noInfo") + + outputDfData=matrix(0,ncol=3+1,nrow=2) + outputDfData[1,]=c("X","df.residual","df.prior","df.total") + outputDfData[,1]=c("Statistics","noGene") +}else{ + if(length(genesToKeep)==1){ + outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=3) + outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) + outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) + outputData[,1]=c("LIMMA","Gene",genesToKeep) + outputData[,2]=c("Comparison","Info","na") + if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep] + outputData[3,seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4) + outputData[3,seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4) + outputData[3,seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4) + outputData[3,seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4) + outputData[3,seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4) + + outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix)) + outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total") + outputDfData[2,]=c(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual","df.prior","df.total")],digits=4)) + }else{ + #format matrix to be correctly read by galaxy (move headers in first column and row) + outputData=matrix(0,ncol=ncol(data.fit.eb$adj_p.value)*5+2,nrow=nrow(data.fit.eb$adj_p.value)+2) + outputData[1,]=c("X","X",rep(humanReadingContrastsRenamed,each=5)) + outputData[2,]=c("X","X",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),ncol(data.fit.eb$adj_p.value))) + outputData[,1]=c("LIMMA","Gene",rownames(data.fit.eb$adj_p.value)) + outputData[,2]=c("Comparison","Info",rep("na",nrow(data.fit.eb$adj_p.value))) + if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(data.fit.eb$adj_p.value)] + outputData[3:nrow(outputData),seq(3,ncol(outputData),5)]=prettyNum(data.fit.eb$p.value,digits=4) + outputData[3:nrow(outputData),seq(4,ncol(outputData),5)]=prettyNum(data.fit.eb$adj_p.value,digits=4) + outputData[3:nrow(outputData),seq(5,ncol(outputData),5)]=prettyNum(2^data.fit.eb$coefficients,digits=4) + outputData[3:nrow(outputData),seq(6,ncol(outputData),5)]=prettyNum(data.fit.eb$coefficients,digits=4) + outputData[3:nrow(outputData),seq(7,ncol(outputData),5)]=prettyNum(data.fit.eb$t,digits=4) + + outputDfData=matrix(0,ncol=3+1,nrow=1+nrow(dfMatrix)) + outputDfData[1,]=c("Statistics","df.residual","df.prior","df.total") + outputDfData[2:(1+nrow(dfMatrix)),]=cbind(rownames(dfMatrix),prettyNum(dfMatrix[,c("df.residual")],digits=4),prettyNum(dfMatrix[,c("df.prior")],digits=4),prettyNum(dfMatrix[,c("df.total")],digits=4)) + } +} +addComment("[INFO]Formated output",T,opt$log,display=FALSE) + +#write output results +write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F) + +#write df info file +write.table(outputDfData,file=opt$outputDfFile,quote=FALSE,sep="\t",col.names = F,row.names = F) + +end.time <- Sys.time() +addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) + +addComment("[INFO]End of R script",T,opt$log,display=FALSE) + +printSessionInfo(opt$log) +#sessionInfo() + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/VolcanoPlotsScript.R Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,426 @@ +# R script to plot volcanos through Galaxy based GIANT tool +# written by Jimmy Vandel +# +# +initial.options <- commandArgs(trailingOnly = FALSE) +file.arg.name <- "--file=" +script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) +script.basename <- dirname(script.name) +source(file.path(script.basename, "utils.R")) +source(file.path(script.basename, "getopt.R")) + +#addComment("Welcome R!") + +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +loc <- Sys.setlocale("LC_NUMERIC", "C") + +#get starting time +start.time <- Sys.time() + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs() + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "statisticsFile", "i", 1, "character", + "volcanoName" , "n", 1, "character", + "pvalColumnName" , "p", 1, "character", + "fdrColumnName" , "m", 1, "character", + "fcColumnName" , "c", 1, "character", + "fcKind","d", 1, "character", + "fdrThreshold","s", 1, "double", + "fcThreshold","e", 1, "double", + "organismID","x",1,"character", + "rowNameType","y",1,"character", + "log", "l", 1, "character", + "outputFile" , "o", 1, "character", + "format", "f", 1, "character", + "quiet", "q", 0, "logical"), + byrow=TRUE, ncol=4) +opt <- getopt(spec) + +# enforce the following required arguments +if (is.null(opt$log)) { + addComment("[ERROR]'log file' is required\n") + q( "no", 1, F ) +} +addComment("[INFO]Start of R script",T,opt$log,display=FALSE) +if (is.null(opt$statisticsFile)) { + addComment("[ERROR]'statisticsFile' is required",T,opt$log) + q( "no", 1, F ) +} +if (length(opt$pvalColumnName)==0 || length(opt$fdrColumnName)==0 || length(opt$fcColumnName)==0) { + addComment("[ERROR]no selected columns",T,opt$log) + q( "no", 1, F ) +} +if (length(opt$pvalColumnName)!=length(opt$fcColumnName) || length(opt$pvalColumnName)!=length(opt$fdrColumnName)) { + addComment("[ERROR]different number of selected columns between p.val, adj-p.val and FC ",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$fcKind)) { + addComment("[ERROR]'fcKind' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$fdrThreshold)) { + addComment("[ERROR]'FDR threshold' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$fcThreshold)) { + addComment("[ERROR]'FC threshold' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$outputFile)) { + addComment("[ERROR]'output file' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$format)) { + addComment("[ERROR]'output format' is required",T,opt$log) + q( "no", 1, F ) +} + +#demande si le script sera bavard +verbose <- if (is.null(opt$quiet)) { + TRUE +}else{ + FALSE +} + +#paramètres internes +addComment("[INFO]Parameters checked test mode !",T,opt$log,display=FALSE) + +addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) +addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) + +#directory for plots +dir.create(file.path(getwd(), "plotDir")) +dir.create(file.path(getwd(), "plotLyDir")) + +#charge des packages silencieusement +suppressPackageStartupMessages({ + library("methods") + library("biomaRt") + library("ggplot2") + library("plotly") + library("stringr") +}) + +#define some usefull variable +nbVolcanosToPlot=length(opt$pvalColumnName) + +#load input file +statDataMatrix=read.csv(file=file.path(getwd(), opt$statisticsFile),header=F,sep="\t",colClasses="character") +#remove first colum to convert it as rownames +rownames(statDataMatrix)=statDataMatrix[,1] +statDataMatrix=statDataMatrix[,-1] + +#identify lines without adjusted p-value info (should contain the same content as rownames) and replace them with NA values +FDRinfo=rep(TRUE,nbVolcanosToPlot) +for(iVolcano in 1:nbVolcanosToPlot){ + #input parameter should be None when adjusted p-val are not available + if(opt$fdrColumnName[iVolcano]=="None"){ + #content of the corresponding column should also be the same as rownames + if(!all(statDataMatrix[,(iVolcano-1)*3+2]==rownames(statDataMatrix))){ + addComment(c("[ERROR]It seems that input stat matrix contains adjusted p-values for volcano",iVolcano,"whereas input parameter indicates that not."),T,opt$log) + q( "no", 1, F ) + } + FDRinfo[iVolcano]=FALSE + statDataMatrix[,(iVolcano-1)*3+2]=NA + } +} + +if(is.data.frame(statDataMatrix)){ + statDataMatrix=data.matrix(statDataMatrix) +}else{ + statDataMatrix=data.matrix(as.numeric(statDataMatrix)) +} + +#check if available column number match with volcano requested number +if(ncol(statDataMatrix)!=3*nbVolcanosToPlot){ + addComment("[ERROR]Input file column number is different from requested volcano number",T,opt$log) + q( "no", 1, F ) +} + +#build global dataFrame with data and fill with p.val and log2(FC) and FDR +dataFrame=data.frame(row.names = rownames(statDataMatrix)) +#start with p-value +dataFrame$p.value=statDataMatrix[,seq(1,nbVolcanosToPlot*3,3),drop=FALSE] +#compute FDR if needed or just get available info +dataFrame$adj_p.value=dataFrame$p.value +for(iVolcano in 1:nbVolcanosToPlot){ + #adjusted p-value are already computed + if(FDRinfo[iVolcano]){ + dataFrame$adj_p.value[,iVolcano]=statDataMatrix[,(iVolcano-1)*3+2,drop=FALSE] + }else{ + #adjusted p-value should be computed based on p-val using FDR + dataFrame$adj_p.value[,iVolcano]=p.adjust(dataFrame$p.value[,iVolcano,drop=FALSE],"fdr") + addComment(c("[INFO]Adjusted p-values are not available in input for volcano",iVolcano,", FDR approach will be used on available raw p-values"),T,opt$log) + } +} +if(opt$fcKind=="FC"){ + #we should transform as Log2FC + dataFrame$coefficients=log2(statDataMatrix[,seq(3,nbVolcanosToPlot*3,3),drop=FALSE]) + addComment(c("[INFO]FC are converted in log2(FC) for plotting"),T,opt$log) +}else{ + dataFrame$coefficients=statDataMatrix[,seq(3,nbVolcanosToPlot*3,3),drop=FALSE] +} + +addComment(c("[INFO]Input data available for",nbVolcanosToPlot,"volcano(s) with",nrow(statDataMatrix),"rows"),T,opt$log) + + +#plot VOLCANOs +volcanoPerPage=1 +logFCthreshold=log2(opt$fcThreshold) +iToPlot=1 +plotVector=list() +volcanoNameList=c() +for (iVolcano in 1:nbVolcanosToPlot){ + + if(nchar(opt$volcanoName[iVolcano])>0){ + curentVolcanoName=opt$volcanoName[iVolcano] + }else{ + curentVolcanoName=paste(iVolcano,opt$pvalColumnName[iVolcano],sep="_") + } + + #keep only rows without NA for p-val, adjusted p-val and coeff + pValToPlot=dataFrame$p.value[,iVolcano] + fdrToPlot=dataFrame$adj_p.value[,iVolcano] + coeffToPlot=dataFrame$coefficients[,iVolcano] + + rowToRemove=unique(c(which(is.na(pValToPlot)),which(is.na(fdrToPlot)),which(is.na(coeffToPlot)))) + if(length(rowToRemove)>0){ + pValToPlot=pValToPlot[-rowToRemove] + fdrToPlot=fdrToPlot[-rowToRemove] + coeffToPlot=coeffToPlot[-rowToRemove] + } + addComment(c("[INFO]For",curentVolcanoName,"volcano,",length(rowToRemove),"rows are discarded due to NA values,",length(pValToPlot),"remaining rows."),T,opt$log) + + #save volcano name + volcanoNameList=c(volcanoNameList,curentVolcanoName) + + #remove characters possibly troubling + volcanoFileName=iVolcano + + #define the log10(p-val) threshold corresponding to FDR threshold fixed by user + probeWithLowFDR=-log10(pValToPlot[which(fdrToPlot<=opt$fdrThreshold)]) + pvalThresholdFDR=NULL + if(length(probeWithLowFDR)>0)pvalThresholdFDR=min(probeWithLowFDR) + + #get significant points over FC and FDR thresholds + significativePoints=intersect(which(abs(coeffToPlot)>=logFCthreshold),which(fdrToPlot<=opt$fdrThreshold)) + + #to reduce size of html plot, we keep 20000 points maximum sampled amongst genes with pval>=33%(pval) and abs(log2(FC))<=66%(abs(log2(FC))) + htmlPointsToRemove=intersect(which(abs(coeffToPlot)<=quantile(abs(coeffToPlot),c(0.66))),which(pValToPlot>=quantile(abs(pValToPlot),c(0.33)))) + if(length(htmlPointsToRemove)>20000){ + htmlPointsToRemove=setdiff(htmlPointsToRemove,sample(htmlPointsToRemove,20000)) + }else{ + htmlPointsToRemove=c() + } + + xMinLimPlot=min(coeffToPlot)-0.2 + xMaxLimPlot=max(coeffToPlot)+0.2 + yMaxLimPlot= max(-log10(pValToPlot))+0.2 + + if(length(significativePoints)>0){ + dataSignifToPlot=data.frame(pval=-log10(pValToPlot[significativePoints]),FC=coeffToPlot[significativePoints],description=paste(names(coeffToPlot[significativePoints]),"\n","FC: " , round(2^coeffToPlot[significativePoints],2) , " | Adjusted p-val: ",prettyNum(fdrToPlot[significativePoints],digits=4), sep="")) + #to test if remains any normal points to draw + if(length(significativePoints)<length(pValToPlot)){ + dataToPlot=data.frame(pval=-log10(pValToPlot[-significativePoints]),FC=coeffToPlot[-significativePoints],description=paste("FC: " , round(2^coeffToPlot[-significativePoints],2) , " | Adjusted p-val: ",prettyNum(fdrToPlot[-significativePoints],digits=4), sep="")) + }else{ + dataToPlot=data.frame(pval=0,FC=0,description="null") + } + }else{ + dataToPlot=data.frame(pval=-log10(pValToPlot),FC=coeffToPlot,description=paste("FC: " , round(2^coeffToPlot,2) , " | Adjusted p-val: ",prettyNum(fdrToPlot,digits=4), sep="")) + } + + ##traditional plot + + p <- ggplot(data=dataToPlot, aes(x=FC, y=pval)) + geom_point() + + theme_bw() + ggtitle(curentVolcanoName) + ylab(label="-Log10(p-val)") + xlab(label="Log2 Fold Change") + + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position="none") + if(logFCthreshold!=0) p <- p + geom_vline(xintercept=-logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_vline(xintercept=logFCthreshold, color="salmon",linetype="dotted", size=1) + geom_text(data.frame(text=c(paste(c("log2(1/FC=",opt$fcThreshold,")"),collapse=""),paste(c("log2(FC=",opt$fcThreshold,")"),collapse="")),x=c(-logFCthreshold,logFCthreshold),y=c(0,0)),mapping=aes(x=x, y=y, label=text), size=4, angle=90, vjust=-0.4, hjust=0, color="salmon") + if(!is.null(pvalThresholdFDR)) p <- p + geom_hline(yintercept=pvalThresholdFDR, color="skyblue1",linetype="dotted", size=0.5) + geom_text(data.frame(text=c(paste(c("Adjusted pval limit(",opt$fdrThreshold,")"),collapse="")),x=c(xMinLimPlot),y=c(pvalThresholdFDR)),mapping=aes(x=x, y=y, label=text), size=4, vjust=0, hjust=0, color="skyblue3") + if(length(significativePoints)>0)p <- p + geom_point(data=dataSignifToPlot,aes(colour=description)) + + ##interactive plot + + if(length(htmlPointsToRemove)>0){ + pointToRemove=union(htmlPointsToRemove,significativePoints) + #to test if it remains any normal points to draw + if(length(pointToRemove)<length(pValToPlot)){ + dataToPlot=data.frame(pval=-log10(pValToPlot[-pointToRemove]),FC=coeffToPlot[-pointToRemove],description=paste("FC: " , round(2^coeffToPlot[-pointToRemove],2) , " | Adjusted p-val: ", prettyNum(fdrToPlot[-pointToRemove],digits=4), sep="")) + }else{ + dataToPlot=data.frame(pval=0,FC=0,description="null") + } + } + + if((nrow(dataToPlot)+length(significativePoints))>40000)addComment(c("[WARNING]For",curentVolcanoName,"volcano, numerous points to plot(",nrow(dataToPlot)+nrow(dataSignifToPlot),"), resulting volcano could be heavy, using more stringent thresholds could be helpful."),T,opt$log) + + phtml <- plot_ly(data=dataToPlot, x=~FC, y=~pval,type="scatter", mode="markers",showlegend = FALSE, marker = list(color="gray",opacity=0.5), text=~description, hoverinfo="text") %>% + layout(title = curentVolcanoName[iVolcano],xaxis=list(title="Log2 Fold Change",showgrid=TRUE, zeroline=FALSE),yaxis=list(title="-Log10(p-val)", showgrid=TRUE, zeroline=FALSE)) + if(length(significativePoints)>0) phtml=add_markers(phtml,data=dataSignifToPlot, x=~FC, y=~pval, mode="markers" , marker=list( color=log10(abs(dataSignifToPlot$FC)*dataSignifToPlot$pval),colorscale='Rainbow'), text=~description, hoverinfo="text", inherit = FALSE) %>% hide_colorbar() + if(logFCthreshold!=0){ + phtml=add_trace(phtml,x=c(-logFCthreshold,-logFCthreshold), y=c(0,yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) + phtml=add_annotations(phtml,x=-logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(1/FC=",opt$fcThreshold,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) + phtml=add_trace(phtml,x=c(logFCthreshold,logFCthreshold), y=c(0, yMaxLimPlot), type="scatter", mode = "lines", line=list(color="coral",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) + phtml=add_annotations(phtml,x=logFCthreshold,y=0,xref = "x",yref = "y",text = paste(c("log2(FC=",opt$fcThreshold,")"),collapse=""),xanchor = 'right',showarrow = F,textangle=270,font=list(color="coral")) + } + if(!is.null(pvalThresholdFDR)){ + phtml=add_trace(phtml,x=c(xMinLimPlot,xMaxLimPlot), y=c(pvalThresholdFDR,pvalThresholdFDR), type="scatter", mode = "lines", line=list(color="cornflowerblue",dash="dash"), hoverinfo='none', showlegend = FALSE,inherit = FALSE) + phtml=add_annotations(phtml,x=xMinLimPlot,y=pvalThresholdFDR+0.1,xref = "x",yref = "y",text = paste(c("Adjusted pval limit(",opt$fdrThreshold,")"),collapse=""),xanchor = 'left',showarrow = F,font=list(color="cornflowerblue")) + } + plotVector[[length(plotVector)+1]]=p + + #save plotly files + pp <- ggplotly(phtml) + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Volcanos_",volcanoFileName,".html"),collapse=""),selfcontained = F) + + + if(iVolcano==nbVolcanosToPlot || length(plotVector)==volcanoPerPage){ + #plot and close the actual plot + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/Volcanos_",volcanoFileName,".pdf"),collapse=""))}else{ + png(paste(c("./plotDir/Volcanos_",volcanoFileName,".png"),collapse="")) + } + multiplot(plotlist=plotVector,cols=1) + dev.off() + if(iVolcano<nbVolcanosToPlot){ + #prepare for a new ploting file if necessary + plotVector=list() + iToPlot=iToPlot+1 + } + } +} +remove(dataToPlot,dataSignifToPlot) +addComment("[INFO]Volcanos drawn",T,opt$log,T,display=FALSE) + + +#now add anotation infos about genes + +rowItemInfo=NULL +if(!is.null(opt$rowNameType) && !is.null(opt$organismID)){ + ##get gene information from BioMart + #if(!require("biomaRt")){ + # source("https://bioconductor.org/biocLite.R") + # biocLite("biomaRt") + #} + + ensembl_hs_mart <- useMart(biomart="ensembl", dataset=opt$organismID) + ensembl_df <- getBM(attributes=c(opt$rowNameType,"description"),mart=ensembl_hs_mart) + rowItemInfo=ensembl_df[which(ensembl_df[,1]!=""),2] + rowItemInfo=unlist(lapply(rowItemInfo,function(x)substr(unlist(strsplit(x," \\[Source"))[1],1,30))) + names(rowItemInfo)=ensembl_df[which(ensembl_df[,1]!=""),1] +} + +#filter out genes with higher p-values for all comparisons +genesToKeep=names(which(apply(dataFrame$adj_p.value,1,function(x)length(which(x<=opt$fdrThreshold))>0))) +#filter out genes with lower FC for all comparisons +genesToKeep=intersect(genesToKeep,names(which(apply(dataFrame$coefficients,1,function(x)length(which(abs(x)>=logFCthreshold))>0)))) + +if(length(genesToKeep)>0){ + dataFrameNew=data.frame(row.names=genesToKeep) + + dataFrameNew$adj_p.value=matrix(dataFrame$adj_p.value[genesToKeep,,drop=FALSE],ncol=ncol(dataFrame$adj_p.value)) + rownames(dataFrameNew$adj_p.value)=genesToKeep + colnames(dataFrameNew$adj_p.value)=colnames(dataFrame$p.value) + + dataFrameNew$p.value=matrix(dataFrame$p.value[genesToKeep,,drop=FALSE],ncol=ncol(dataFrame$p.value)) + rownames(dataFrameNew$p.value)=genesToKeep + colnames(dataFrameNew$p.value)=colnames(dataFrame$adj_p.value) + + dataFrameNew$coefficients=matrix(dataFrame$coefficients[genesToKeep,,drop=FALSE],ncol=ncol(dataFrame$coefficients)) + rownames(dataFrameNew$coefficients)=genesToKeep + colnames(dataFrameNew$coefficients)=colnames(dataFrame$adj_p.value) + + dataFrame=dataFrameNew + rm(dataFrameNew) +}else{ + addComment("[WARNING]No significative genes",T,opt$log,display=FALSE) +} + +addComment("[INFO]Significant genes filtering done",T,opt$log,T,display=FALSE) + + +#plot VennDiagramm for genes below threshold between comparisons +#t=apply(dataFrame$adj_p.value[,1:4],2,function(x)names(which(x<=opt$threshold))) +#get.venn.partitions(t) +#vennCounts(dataFrame$adj_p.value[,1:4]<=opt$threshold) + +#make a simple sort genes based only on the first comparison +#newOrder=order(dataFrame$adj_p.value[,1]) +#dataFrame$adj_p.value=dataFrame$adj_p.value[newOrder,] + +#alternative sorting strategy based on the mean gene rank over all comparisons +if(length(genesToKeep)>1){ + currentRank=rep(0,nrow(dataFrame$adj_p.value)) + for(iVolcano in 1:ncol(dataFrame$adj_p.value)){ + currentRank=currentRank+rank(dataFrame$adj_p.value[,iVolcano]) + } + currentRank=currentRank/ncol(dataFrame$adj_p.value) + newOrder=order(currentRank) + rownames(dataFrame)=rownames(dataFrame)[newOrder] + + dataFrame$adj_p.value=matrix(dataFrame$adj_p.value[newOrder,],ncol=ncol(dataFrame$adj_p.value)) + rownames(dataFrame$adj_p.value)=rownames(dataFrame$p.value)[newOrder] + colnames(dataFrame$adj_p.value)=colnames(dataFrame$p.value) + + dataFrame$p.value=matrix(dataFrame$p.value[newOrder,],ncol=ncol(dataFrame$p.value)) + rownames(dataFrame$p.value)=rownames(dataFrame$adj_p.value) + colnames(dataFrame$p.value)=colnames(dataFrame$adj_p.value) + + dataFrame$coefficients=matrix(dataFrame$coefficients[newOrder,],ncol=ncol(dataFrame$coefficients)) + rownames(dataFrame$coefficients)=rownames(dataFrame$adj_p.value) + colnames(dataFrame$coefficients)=colnames(dataFrame$adj_p.value) +} + +#formating output matrix depending on genes to keep +if(length(genesToKeep)==0){ + outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3) + outputData[1,]=c("X","X",rep(volcanoNameList,each=4)) + outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value))) + outputData[,1]=c("Volcano","Gene","noGene") + outputData[,2]=c("Comparison","Info","noInfo") +}else{ + if(length(genesToKeep)==1){ + outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=3) + outputData[1,]=c("X","X",rep(volcanoNameList,each=4)) + outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value))) + outputData[,1]=c("Volcano","Gene",genesToKeep) + outputData[,2]=c("Comparison","Info","na") + if(!is.null(rowItemInfo))outputData[3,2]=rowItemInfo[genesToKeep] + outputData[3,seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4) + outputData[3,seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4) + outputData[3,seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4) + outputData[3,seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4) + }else{ + #format matrix to be correctly read by galaxy (move headers in first column and row) + outputData=matrix(0,ncol=ncol(dataFrame$adj_p.value)*4+2,nrow=nrow(dataFrame$adj_p.value)+2) + outputData[1,]=c("X","X",rep(volcanoNameList,each=4)) + outputData[2,]=c("X","X",rep(c("p-val","Adjusted.p-val","FC","log2(FC)"),ncol(dataFrame$adj_p.value))) + outputData[,1]=c("Volcano","Gene",rownames(dataFrame$adj_p.value)) + outputData[,2]=c("Comparison","Info",rep("na",nrow(dataFrame$adj_p.value))) + if(!is.null(rowItemInfo))outputData[3:nrow(outputData),2]=rowItemInfo[rownames(dataFrame$adj_p.value)] + outputData[3:nrow(outputData),seq(3,ncol(outputData),4)]=prettyNum(dataFrame$p.value,digits=4) + outputData[3:nrow(outputData),seq(4,ncol(outputData),4)]=prettyNum(dataFrame$adj_p.value,digits=4) + outputData[3:nrow(outputData),seq(5,ncol(outputData),4)]=prettyNum(2^dataFrame$coefficients,digits=4) + outputData[3:nrow(outputData),seq(6,ncol(outputData),4)]=prettyNum(dataFrame$coefficients,digits=4) + } +} +addComment("[INFO]Formated output",T,opt$log,display=FALSE) + +#write output results +write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F) + + +end.time <- Sys.time() +addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) + +addComment("[INFO]End of R script",T,opt$log,display=FALSE) + +printSessionInfo(opt$log) + +#sessionInfo() \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/getopt.R Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,773 @@ +# Copyright (c) 2008-2010 Allen Day +# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu> +# +# Modified by J.Vandel 2017 to consider situation of multiple identical flag +# and concatenate as a vector the set of parameter for the same flag instead of +# keeping only the last value as done by the previous version. +# +# This file is free software: you may copy, redistribute and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 2 of the License, or (at your +# option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +#' C-like getopt behavior +#' +#' getopt is primarily intended to be used with ``\link{Rscript}''. It +#' facilitates writing ``\#!'' shebang scripts that accept short and long +#' flags/options. It can also be used from ``R'' directly, but is probably less +#' useful in this context. +#' +#' getopt() returns a \link{list} data structure containing \link{names} of the +#' flags that were present in the \link{character} \link{vector} passed in under +#' the \emph{opt} argument. Each value of the \link{list} is coerced to the +#' data type specified according to the value of the \emph{spec} argument. See +#' below for details. +#' +#' Notes on naming convention: +#' +#' 1. An \emph{option} is one of the shell-split input strings. +#' +#' 2. A \emph{flag} is a type of \emph{option}. a \emph{flag} can be defined as +#' having no \emph{argument} (defined below), a required \emph{argument}, or an +#' optional \emph{argument}. +#' +#' 3. An \emph{argument} is a type of \emph{option}, and is the value associated +#' with a flag. +#' +#' 4. A \emph{long flag} is a type of \emph{flag}, and begins with the string +#' ``--''. If the \emph{long flag} has an associated \emph{argument}, it may be +#' delimited from the \emph{long flag} by either a trailing \emph{=}, or may be +#' the subsequent \emph{option}. +#' +#' 5. A \emph{short flag} is a type of \emph{flag}, and begins with the string +#' ``-''. If a \emph{short flag} has an associated \emph{argument}, it is the +#' subsequent \emph{option}. \emph{short flags} may be bundled together, +#' sharing a single leading ``-'', but only the final \emph{short flag} is able +#' to have a corresponding \emph{argument}. +#' +#' Many users wonder whether they should use the getopt package, optparse package, +#' or argparse package. +#' Here is some of the major differences: +#' +#' Features available in \code{getopt} unavailable in \code{optparse} +#' +#' 1. As well as allowing one to specify options that take either +#' no argument or a required argument like \code{optparse}, +#' \code{getopt} also allows one to specify option with an optional argument. +#' +#' Some features implemented in \code{optparse} package unavailable in \code{getopt} +#' +#' 1. Limited support for capturing positional arguments after the optional arguments +#' when \code{positional_arguments} set to TRUE in \code{parse_args} +#' +#' 2. Automatic generation of an help option and printing of help text when encounters an "-h" +#' +#' 3. Option to specify default arguments for options as well the +#' variable name to store option values +#' +#' There is also new package \code{argparse} introduced in 2012 which contains +#' all the features of both getopt and optparse but which has a dependency on +#' Python 2.7 or 3.2+ and has not been used in production since 2008 or 2009 +#' like the getopt and optparse packages. +#' +#' Some Features unlikely to be implemented in \code{getopt}: +#' +#' 1. Support for multiple, identical flags, e.g. for "-m 3 -v 5 -v", the +#' trailing "-v" overrides the preceding "-v 5", result is v=TRUE (or equivalent +#' typecast). +#' +#' 2. Support for multi-valued flags, e.g. "--libpath=/usr/local/lib +#' --libpath=/tmp/foo". +#' +#' 3. Support for lists, e.g. "--define os=linux --define os=redhat" would +#' set result$os$linux=TRUE and result$os$redhat=TRUE. +#' +#' 4. Support for incremental, argument-less flags, e.g. "/path/to/script +#' -vvv" should set v=3. +#' +#' 5. Support partial-but-unique string match on options, e.g. "--verb" and +#' "--verbose" both match long flag "--verbose". +#' +#' 6. No support for mixing in positional arguments or extra arguments that +#' don't match any options. For example, you can't do "my.R --arg1 1 foo bar +#' baz" and recover "foo", "bar", "baz" as a list. Likewise for "my.R foo +#' --arg1 1 bar baz". +#' +#' @aliases getopt getopt-package +#' @param spec The getopt specification, or spec of what options are considered +#' valid. The specification must be either a 4-5 column \link{matrix}, or a +#' \link{character} \link{vector} coercible into a 4 column \link{matrix} using +#' \link{matrix}(x,ncol=4,byrow=TRUE) command. The \link{matrix}/\link{vector} +#' contains: +#' +#' Column 1: the \emph{long flag} name. A multi-\link{character} string. +#' +#' Column 2: \emph{short flag} alias of Column 1. A single-\link{character} +#' string. +#' +#' Column 3: \emph{Argument} mask of the \emph{flag}. An \link{integer}. +#' Possible values: 0=no argument, 1=required argument, 2=optional argument. +#' +#' Column 4: Data type to which the \emph{flag}'s argument shall be cast using +#' \link{storage.mode}. A multi-\link{character} string. This only considered +#' for same-row Column 3 values of 1,2. Possible values: \link{logical}, +#' \link{integer}, \link{double}, \link{complex}, \link{character}. +#' If \link{numeric} is encountered then it will be converted to double. +#' +#' Column 5 (optional): A brief description of the purpose of the option. +#' +#' The terms \emph{option}, \emph{flag}, \emph{long flag}, \emph{short flag}, +#' and \emph{argument} have very specific meanings in the context of this +#' document. Read the ``Description'' section for definitions. +#' @param opt This defaults to the return value of \link{commandArgs}(TRUE). +#' +#' If R was invoked directly via the ``R'' command, this corresponds to all +#' arguments passed to R after the ``--args'' flag. +#' +#' If R was invoked via the ``\link{Rscript}'' command, this corresponds to all +#' arguments after the name of the R script file. +#' +#' Read about \link{commandArgs} and \link{Rscript} to learn more. +#' @param command The string to use in the usage message as the name of the +#' script. See argument \emph{usage}. +#' @param usage If TRUE, argument \emph{opt} will be ignored and a usage +#' statement (character string) will be generated and returned from \emph{spec}. +#' @param debug This is used internally to debug the getopt() function itself. +#' @author Allen Day +#' @seealso \code{\link{getopt}} +#' @keywords data +#' @export +#' @examples +#' +#' #!/path/to/Rscript +#' library('getopt'); +#' #get options, using the spec as defined by the enclosed list. +#' #we read the options from the default: commandArgs(TRUE). +#' spec = matrix(c( +#' 'verbose', 'v', 2, "integer", +#' 'help' , 'h', 0, "logical", +#' 'count' , 'c', 1, "integer", +#' 'mean' , 'm', 1, "double", +#' 'sd' , 's', 1, "double" +#' ), byrow=TRUE, ncol=4); +#' opt = getopt(spec); +#' +#' # if help was asked for print a friendly message +#' # and exit with a non-zero error code +#' if ( !is.null(opt$help) ) { +#' cat(getopt(spec, usage=TRUE)); +#' q(status=1); +#' } +#' +#' #set some reasonable defaults for the options that are needed, +#' #but were not specified. +#' if ( is.null(opt$mean ) ) { opt$mean = 0 } +#' if ( is.null(opt$sd ) ) { opt$sd = 1 } +#' if ( is.null(opt$count ) ) { opt$count = 10 } +#' if ( is.null(opt$verbose ) ) { opt$verbose = FALSE } +#' +#' #print some progress messages to stderr, if requested. +#' if ( opt$verbose ) { write("writing...",stderr()); } +#' +#' #do some operation based on user input. +#' cat(paste(rnorm(opt$count,mean=opt$mean,sd=opt$sd),collapse="\n")); +#' cat("\n"); +#' +#' #signal success and exit. +#' #q(status=0); +getopt = function (spec=NULL,opt=commandArgs(TRUE),command=get_Rscript_filename(),usage=FALSE,debug=FALSE) { + + # littler compatibility - map argv vector to opt + if (exists("argv", where = .GlobalEnv, inherits = FALSE)) { + opt = get("argv", envir = .GlobalEnv); + } + + ncol=4; + maxcol=6; + col.long.name = 1; + col.short.name = 2; + col.has.argument = 3; + col.mode = 4; + col.description = 5; + + flag.no.argument = 0; + flag.required.argument = 1; + flag.optional.argument = 2; + + result = list(); + result$ARGS = vector(mode="character"); + + #no spec. fail. + if ( is.null(spec) ) { + stop('argument "spec" must be non-null.'); + + #spec is not a matrix. attempt to coerce, if possible. issue a warning. + } else if ( !is.matrix(spec) ) { + if ( length(spec)/4 == as.integer(length(spec)/4) ) { + warning('argument "spec" was coerced to a 4-column (row-major) matrix. use a matrix to prevent the coercion'); + spec = matrix( spec, ncol=ncol, byrow=TRUE ); + } else { + stop('argument "spec" must be a matrix, or a character vector with length divisible by 4, rtfm.'); + } + + #spec is a matrix, but it has too few columns. + } else if ( dim(spec)[2] < ncol ) { + stop(paste('"spec" should have at least ",ncol," columns.',sep='')); + + #spec is a matrix, but it has too many columns. + } else if ( dim(spec)[2] > maxcol ) { + stop(paste('"spec" should have no more than ",maxcol," columns.',sep='')); + + #spec is a matrix, and it has some optional columns. + } else if ( dim(spec)[2] != ncol ) { + ncol = dim(spec)[2]; + } + + #sanity check. make sure long names are unique, and short names are unique. + if ( length(unique(spec[,col.long.name])) != length(spec[,col.long.name]) ) { + stop(paste('redundant long names for flags (column ',col.long.name,').',sep='')); + } + if ( length(na.omit(unique(spec[,col.short.name]))) != length(na.omit(spec[,col.short.name])) ) { + stop(paste('redundant short names for flags (column ',col.short.name,').',sep='')); + } + # convert numeric type to double type + spec[,4] <- gsub("numeric", "double", spec[,4]) + + # if usage=TRUE, don't process opt, but generate a usage string from the data in spec + if ( usage ) { + ret = ''; + ret = paste(ret,"Usage: ",command,sep=''); + for ( j in 1:(dim(spec))[1] ) { + ret = paste(ret,' [-[-',spec[j,col.long.name],'|',spec[j,col.short.name],']',sep=''); + if (spec[j,col.has.argument] == flag.no.argument) { + ret = paste(ret,']',sep=''); + } else if (spec[j,col.has.argument] == flag.required.argument) { + ret = paste(ret,' <',spec[j,col.mode],'>]',sep=''); + } else if (spec[j,col.has.argument] == flag.optional.argument) { + ret = paste(ret,' [<',spec[j,col.mode],'>]]',sep=''); + } + } + # include usage strings + if ( ncol >= 5 ) { + max.long = max(apply(cbind(spec[,col.long.name]),1,function(x)length(strsplit(x,'')[[1]]))); + ret = paste(ret,"\n",sep=''); + for (j in 1:(dim(spec))[1] ) { + ret = paste(ret,sprintf(paste(" -%s|--%-",max.long,"s %s\n",sep=''), + spec[j,col.short.name],spec[j,col.long.name],spec[j,col.description] + ),sep=''); + } + } + else { + ret = paste(ret,"\n",sep=''); + } + return(ret); + } + + #XXX check spec validity here. e.g. column three should be convertible to integer + + i = 1; + + while ( i <= length(opt) ) { + if ( debug ) print(paste("processing",opt[i])); + + current.flag = 0; #XXX use NA + optstring = opt[i]; + + + #long flag + if ( substr(optstring, 1, 2) == '--' ) { + if ( debug ) print(paste(" long option:",opt[i])); + + optstring = substring(optstring,3); + + this.flag = NA; + this.argument = NA; + kv = strsplit(optstring, '=')[[1]]; + if ( !is.na(kv[2]) ) { + this.flag = kv[1]; + this.argument = paste(kv[-1], collapse="="); + } else { + this.flag = optstring; + } + + rowmatch = grep( this.flag, spec[,col.long.name],fixed=TRUE ); + + #long flag is invalid, matches no options + if ( length(rowmatch) == 0 ) { + stop(paste('long flag "', this.flag, '" is invalid', sep='')); + + #long flag is ambiguous, matches too many options + } else if ( length(rowmatch) > 1 ) { + # check if there is an exact match and use that + rowmatch = which(this.flag == spec[,col.long.name]) + if(length(rowmatch) == 0) { + stop(paste('long flag "', this.flag, '" is ambiguous', sep='')); + } + } + + #if we have an argument + if ( !is.na(this.argument) ) { + #if we can't accept the argument, bail out + if ( spec[rowmatch, col.has.argument] == flag.no.argument ) { + stop(paste('long flag "', this.flag, '" accepts no arguments', sep='')); + + #otherwise assign the argument to the flag + } else { + storage.mode(this.argument) = spec[rowmatch, col.mode]; + #don't need here to remove the last value of the vector as argument is in the same string as + #the flag name "--flag=argument" so no spurious TRUE was added + result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],this.argument); + i = i + 1; + next; + } + + #otherwise, we don't have an argument + } else { + #if we require an argument, bail out + ###if ( spec[rowmatch, col.has.argument] == flag.required.argument ) { + ### stop(paste('long flag "', this.flag, '" requires an argument', sep='')); + + #long flag has no attached argument. set flag as present. set current.flag so we can peek ahead later and consume the argument if it's there + ###} else { + result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE); + current.flag = rowmatch; + ###} + } + + #short flag(s) + } else if ( substr(optstring, 1, 1) == '-' ) { + if ( debug ) print(paste(" short option:",opt[i])); + + these.flags = strsplit(optstring,'')[[1]]; + + done = FALSE; + for ( j in 2:length(these.flags) ) { + this.flag = these.flags[j]; + rowmatch = grep( this.flag, spec[,col.short.name],fixed=TRUE ); + + #short flag is invalid, matches no options + if ( length(rowmatch) == 0 ) { + stop(paste('short flag "', this.flag, '" is invalid', sep='')); + + #short flag is ambiguous, matches too many options + } else if ( length(rowmatch) > 1 ) { + stop(paste('short flag "', this.flag, '" is ambiguous', sep='')); + + #short flag has an argument, but is not the last in a compound flag string + } else if ( j < length(these.flags) & spec[rowmatch,col.has.argument] == flag.required.argument ) { + stop(paste('short flag "', this.flag, '" requires an argument, but has none', sep='')); + + #short flag has no argument, flag it as present + } else if ( spec[rowmatch,col.has.argument] == flag.no.argument ) { + result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE); + done = TRUE; + + #can't definitively process this flag yet, need to see if next option is an argument or not + } else { + result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE); + current.flag = rowmatch; + done = FALSE; + } + } + if ( done ) { + i = i + 1; + next; + } + } + + #invalid opt + if ( current.flag == 0 ) { + stop(paste('"', optstring, '" is not a valid option, or does not support an argument', sep='')); + #TBD support for positional args + #if ( debug ) print(paste('"', optstring, '" not a valid option. It is appended to getopt(...)$ARGS', sep='')); + #result$ARGS = append(result$ARGS, optstring); + + # some dangling flag, handle it + } else if ( current.flag > 0 ) { + if ( debug ) print(' dangling flag'); + if ( length(opt) > i ) { + peek.optstring = opt[i + 1]; + if ( debug ) print(paste(' peeking ahead at: "',peek.optstring,'"',sep='')); + + #got an argument. attach it, increment the index, and move on to the next option. we don't allow arguments beginning with '-' UNLESS + #specfile indicates the value is an "integer" or "double", in which case we allow a leading dash (and verify trailing digits/decimals). + if ( substr(peek.optstring, 1, 1) != '-' | + #match negative double + ( substr(peek.optstring, 1, 1) == '-' + & regexpr('^-[0123456789]*\\.?[0123456789]+$',peek.optstring) > 0 + & spec[current.flag, col.mode]== 'double' + ) | + #match negative integer + ( substr(peek.optstring, 1, 1) == '-' + & regexpr('^-[0123456789]+$',peek.optstring) > 0 + & spec[current.flag, col.mode]== 'integer' + ) + ) { + if ( debug ) print(paste(' consuming argument *',peek.optstring,'*',sep='')); + storage.mode(peek.optstring) = spec[current.flag, col.mode]; + #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring); + i = i + 1; + + #a lone dash + } else if ( substr(peek.optstring, 1, 1) == '-' & length(strsplit(peek.optstring,'')[[1]]) == 1 ) { + if ( debug ) print(' consuming "lone dash" argument'); + storage.mode(peek.optstring) = spec[current.flag, col.mode]; + #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones + result[[spec[current.flag, col.long.name]]] =c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring); + i = i + 1; + + #no argument + } else { + if ( debug ) print(' no argument!'); + + #if we require an argument, bail out + if ( spec[current.flag, col.has.argument] == flag.required.argument ) { + stop(paste('flag "', this.flag, '" requires an argument', sep='')); + + #otherwise set flag as present. + } else if ( + spec[current.flag, col.has.argument] == flag.optional.argument | + spec[current.flag, col.has.argument] == flag.no.argument + ) { + x = TRUE; + storage.mode(x) = spec[current.flag, col.mode]; + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x); + } else { + stop(paste("This should never happen.", + "Is your spec argument correct? Maybe you forgot to set", + "ncol=4, byrow=TRUE in your matrix call?")); + } + } + #trailing flag without required argument + } else if ( spec[current.flag, col.has.argument] == flag.required.argument ) { + stop(paste('flag "', this.flag, '" requires an argument', sep='')); + + #trailing flag without optional argument + } else if ( spec[current.flag, col.has.argument] == flag.optional.argument ) { + x = TRUE; + storage.mode(x) = spec[current.flag, col.mode]; + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x); + + #trailing flag without argument + } else if ( spec[current.flag, col.has.argument] == flag.no.argument ) { + x = TRUE; + storage.mode(x) = spec[current.flag, col.mode]; + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x); + } else { + stop("this should never happen (2). please inform the author."); + } + #no dangling flag, nothing to do. + } else { + } + + i = i+1; + } + return(result); +} + + + +######################### +#set a modified version using only long named parameters + +getoptLong = function (spec=NULL,opt=commandArgs(TRUE),command=get_Rscript_filename(),usage=FALSE,debug=FALSE) { + + # littler compatibility - map argv vector to opt + if (exists("argv", where = .GlobalEnv, inherits = FALSE)) { + opt = get("argv", envir = .GlobalEnv); + } + + ncol=4; + maxcol=6; + col.long.name = 1; + #col.short.name = 2; + col.has.argument = 3; + col.mode = 4; + col.description = 5; + + flag.no.argument = 0; + flag.required.argument = 1; + flag.optional.argument = 2; + + result = list(); + result$ARGS = vector(mode="character"); + + #no spec. fail. + if ( is.null(spec) ) { + stop('argument "spec" must be non-null.'); + + #spec is not a matrix. attempt to coerce, if possible. issue a warning. + } else if ( !is.matrix(spec) ) { + if ( length(spec)/4 == as.integer(length(spec)/4) ) { + warning('argument "spec" was coerced to a 4-column (row-major) matrix. use a matrix to prevent the coercion'); + spec = matrix( spec, ncol=ncol, byrow=TRUE ); + } else { + stop('argument "spec" must be a matrix, or a character vector with length divisible by 4, rtfm.'); + } + + #spec is a matrix, but it has too few columns. + } else if ( dim(spec)[2] < ncol ) { + stop(paste('"spec" should have at least ",ncol," columns.',sep='')); + + #spec is a matrix, but it has too many columns. + } else if ( dim(spec)[2] > maxcol ) { + stop(paste('"spec" should have no more than ",maxcol," columns.',sep='')); + + #spec is a matrix, and it has some optional columns. + } else if ( dim(spec)[2] != ncol ) { + ncol = dim(spec)[2]; + } + + #sanity check. make sure long names are unique, and short names are unique. + if ( length(unique(spec[,col.long.name])) != length(spec[,col.long.name]) ) { + stop(paste('redundant long names for flags (column ',col.long.name,').',sep='')); + } + # if ( length(na.omit(unique(spec[,col.short.name]))) != length(na.omit(spec[,col.short.name])) ) { + # stop(paste('redundant short names for flags (column ',col.short.name,').',sep='')); + # } + # convert numeric type to double type + spec[,4] <- gsub("numeric", "double", spec[,4]) + + # if usage=TRUE, don't process opt, but generate a usage string from the data in spec + if ( usage ) { + ret = ''; + ret = paste(ret,"Usage: ",command,sep=''); + for ( j in 1:(dim(spec))[1] ) { + ret = paste(ret,' [-[-',spec[j,col.long.name],']',sep=''); + if (spec[j,col.has.argument] == flag.no.argument) { + ret = paste(ret,']',sep=''); + } else if (spec[j,col.has.argument] == flag.required.argument) { + ret = paste(ret,' <',spec[j,col.mode],'>]',sep=''); + } else if (spec[j,col.has.argument] == flag.optional.argument) { + ret = paste(ret,' [<',spec[j,col.mode],'>]]',sep=''); + } + } + # include usage strings + if ( ncol >= 5 ) { + max.long = max(apply(cbind(spec[,col.long.name]),1,function(x)length(strsplit(x,'')[[1]]))); + ret = paste(ret,"\n",sep=''); + for (j in 1:(dim(spec))[1] ) { + ret = paste(ret,sprintf(paste("--%-",max.long,"s %s\n",sep='') + ,spec[j,col.long.name],spec[j,col.description] + ),sep=''); + } + } + else { + ret = paste(ret,"\n",sep=''); + } + return(ret); + } + + #XXX check spec validity here. e.g. column three should be convertible to integer + + i = 1; + + while ( i <= length(opt) ) { + if ( debug ) print(paste("processing",opt[i])); + + current.flag = 0; #XXX use NA + optstring = opt[i]; + + + #long flag + if ( substr(optstring, 1, 2) == '--' ) { + if ( debug ) print(paste(" long option:",opt[i])); + + optstring = substring(optstring,3); + + this.flag = NA; + this.argument = NA; + kv = strsplit(optstring, '=')[[1]]; + if ( !is.na(kv[2]) ) { + this.flag = kv[1]; + this.argument = paste(kv[-1], collapse="="); + } else { + this.flag = optstring; + } + + rowmatch = grep( this.flag, spec[,col.long.name],fixed=TRUE ); + + #long flag is invalid, matches no options + if ( length(rowmatch) == 0 ) { + stop(paste('long flag "', this.flag, '" is invalid', sep='')); + + #long flag is ambiguous, matches too many options + } else if ( length(rowmatch) > 1 ) { + # check if there is an exact match and use that + rowmatch = which(this.flag == spec[,col.long.name]) + if(length(rowmatch) == 0) { + stop(paste('long flag "', this.flag, '" is ambiguous', sep='')); + } + } + + #if we have an argument + if ( !is.na(this.argument) ) { + #if we can't accept the argument, bail out + if ( spec[rowmatch, col.has.argument] == flag.no.argument ) { + stop(paste('long flag "', this.flag, '" accepts no arguments', sep='')); + + #otherwise assign the argument to the flag + } else { + storage.mode(this.argument) = spec[rowmatch, col.mode]; + #don't need here to remove the last value of the vector as argument is in the same string as + #the flag name "--flag=argument" so no spurious TRUE was added + result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],this.argument); + i = i + 1; + next; + } + + #otherwise, we don't have an argument + } else { + #if we require an argument, bail out + ###if ( spec[rowmatch, col.has.argument] == flag.required.argument ) { + ### stop(paste('long flag "', this.flag, '" requires an argument', sep='')); + + #long flag has no attached argument. set flag as present. set current.flag so we can peek ahead later and consume the argument if it's there + ###} else { + result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE); + current.flag = rowmatch; + ###} + } + + #short flag(s) + } + #else if ( substr(optstring, 1, 1) == '-' ) { + # if ( debug ) print(paste(" short option:",opt[i])); + # + # these.flags = strsplit(optstring,'')[[1]]; + # + # done = FALSE; + # for ( j in 2:length(these.flags) ) { + # this.flag = these.flags[j]; + # rowmatch = grep( this.flag, spec[,col.short.name],fixed=TRUE ); + # + # #short flag is invalid, matches no options + # if ( length(rowmatch) == 0 ) { + # stop(paste('short flag "', this.flag, '" is invalid', sep='')); + # + # #short flag is ambiguous, matches too many options + # } else if ( length(rowmatch) > 1 ) { + # stop(paste('short flag "', this.flag, '" is ambiguous', sep='')); + # + # #short flag has an argument, but is not the last in a compound flag string + # } else if ( j < length(these.flags) & spec[rowmatch,col.has.argument] == flag.required.argument ) { + # stop(paste('short flag "', this.flag, '" requires an argument, but has none', sep='')); + # + # #short flag has no argument, flag it as present + # } else if ( spec[rowmatch,col.has.argument] == flag.no.argument ) { + # result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE); + # done = TRUE; + # + # #can't definitively process this flag yet, need to see if next option is an argument or not + # } else { + # result[[spec[rowmatch, col.long.name]]] = c(result[[spec[rowmatch, col.long.name]]],TRUE); + # current.flag = rowmatch; + # done = FALSE; + # } + # } + # if ( done ) { + # i = i + 1; + # next; + # } + # } + + #invalid opt + if ( current.flag == 0 ) { + stop(paste('"', optstring, '" is not a valid option, or does not support an argument', sep='')); + #TBD support for positional args + #if ( debug ) print(paste('"', optstring, '" not a valid option. It is appended to getopt(...)$ARGS', sep='')); + #result$ARGS = append(result$ARGS, optstring); + + # some dangling flag, handle it + } else if ( current.flag > 0 ) { + if ( debug ) print(' dangling flag'); + if ( length(opt) > i ) { + peek.optstring = opt[i + 1]; + if ( debug ) print(paste(' peeking ahead at: "',peek.optstring,'"',sep='')); + + #got an argument. attach it, increment the index, and move on to the next option. we don't allow arguments beginning with '-' UNLESS + #specfile indicates the value is an "integer" or "double", in which case we allow a leading dash (and verify trailing digits/decimals). + if ( substr(peek.optstring, 1, 1) != '-' | + #match negative double + ( substr(peek.optstring, 1, 1) == '-' + & regexpr('^-[0123456789]*\\.?[0123456789]+$',peek.optstring) > 0 + & spec[current.flag, col.mode]== 'double' + ) | + #match negative integer + ( substr(peek.optstring, 1, 1) == '-' + & regexpr('^-[0123456789]+$',peek.optstring) > 0 + & spec[current.flag, col.mode]== 'integer' + ) + ) { + if ( debug ) print(paste(' consuming argument *',peek.optstring,'*',sep='')); + storage.mode(peek.optstring) = spec[current.flag, col.mode]; + #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring); + i = i + 1; + + #a lone dash + } else if ( substr(peek.optstring, 1, 1) == '-' & length(strsplit(peek.optstring,'')[[1]]) == 1 ) { + if ( debug ) print(' consuming "lone dash" argument'); + storage.mode(peek.optstring) = spec[current.flag, col.mode]; + #remove the last argument put in result for current.flag that should be a TRUE and concatenate argument with previous ones + result[[spec[current.flag, col.long.name]]] =c(result[[spec[current.flag, col.long.name]]][-length(result[[spec[current.flag, col.long.name]]])],peek.optstring); + i = i + 1; + + #no argument + } else { + if ( debug ) print(' no argument!'); + + #if we require an argument, bail out + if ( spec[current.flag, col.has.argument] == flag.required.argument ) { + stop(paste('flag "', this.flag, '" requires an argument', sep='')); + + #otherwise set flag as present. + } else if ( + spec[current.flag, col.has.argument] == flag.optional.argument | + spec[current.flag, col.has.argument] == flag.no.argument + ) { + x = TRUE; + storage.mode(x) = spec[current.flag, col.mode]; + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x); + } else { + stop(paste("This should never happen.", + "Is your spec argument correct? Maybe you forgot to set", + "ncol=4, byrow=TRUE in your matrix call?")); + } + } + #trailing flag without required argument + } else if ( spec[current.flag, col.has.argument] == flag.required.argument ) { + stop(paste('flag "', this.flag, '" requires an argument', sep='')); + + #trailing flag without optional argument + } else if ( spec[current.flag, col.has.argument] == flag.optional.argument ) { + x = TRUE; + storage.mode(x) = spec[current.flag, col.mode]; + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x); + + #trailing flag without argument + } else if ( spec[current.flag, col.has.argument] == flag.no.argument ) { + x = TRUE; + storage.mode(x) = spec[current.flag, col.mode]; + result[[spec[current.flag, col.long.name]]] = c(result[[spec[current.flag, col.long.name]]],x); + } else { + stop("this should never happen (2). please inform the author."); + } + #no dangling flag, nothing to do. + } else { + } + + i = i+1; + } + return(result); +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/heatMapClustering.R Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,896 @@ +# A command-line interface to plot heatmap based on expression or diff. exp. analysis +# written by Jimmy Vandel +# one of these arguments is required: +# +# +initial.options <- commandArgs(trailingOnly = FALSE) +file.arg.name <- "--file=" +script.name <- sub(file.arg.name, "", initial.options[grep(file.arg.name, initial.options)]) +script.basename <- dirname(script.name) +source(file.path(script.basename, "utils.R")) +source(file.path(script.basename, "getopt.R")) + +#addComment("Welcome R!") + +# setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat(geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +loc <- Sys.setlocale("LC_NUMERIC", "C") + +#get starting time +start.time <- Sys.time() + + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE, OutDec=".") + +#get options +args <- commandArgs() + +# get options, using the spec as defined by the enclosed list. +# we read the options from the default: commandArgs(TRUE). +spec <- matrix(c( + "expressionFile", "x", 1, "character", + "diffAnalyseFile", "x", 1, "character", + "factorInfo","x", 1, "character", + "genericData","x", 0, "logical", + "comparisonName","x",1,"character", + "comparisonNameLow","x",1,"character", + "comparisonNameHigh","x",1,"character", + "filterInputOutput","x", 1, "character", + "FCthreshold","x", 1, "double", + "pvalThreshold","x", 1, "double", + "geneListFiltering","x",1,"character", + "clusterNumber","x",1,"integer", + "maxRows","x",1,"integer", + "sampleClusterNumber","x",1,"integer", + "dataTransformation","x",1,"character", + "distanceMeasure","x",1,"character", + "aggloMethod","x",1,"character", + "personalColors","x",1,"character", + "sideBarColorPalette","x",1,"character", + "format", "x", 1, "character", + "quiet", "x", 0, "logical", + "log", "x", 1, "character", + "outputFile" , "x", 1, "character"), + byrow=TRUE, ncol=4) +opt <- getoptLong(spec) + +# enforce the following required arguments +if (is.null(opt$log)) { + addComment("[ERROR]'log file' is required") + q( "no", 1, F ) +} +addComment("[INFO]Start of R script",T,opt$log,display=FALSE) +if (is.null(opt$format)) { + addComment("[ERROR]'output format' is required",T,opt$log) + q( "no", 1, F ) +} +if (is.null(opt$outputFile)) { + addComment("[ERROR]'output file' is required",T,opt$log) + q( "no", 1, F ) +} + +if(is.null(opt$expressionFile) && !is.null(opt$genericData)){ + addComment("[ERROR]generic data clustering is based on expression clustering",T,opt$log) + q( "no", 1, F ) +} + +if (is.null(opt$clusterNumber) || opt$clusterNumber<2) { + addComment("[ERROR]valid genes clusters number is required",T,opt$log) + q( "no", 1, F ) +} + +if (is.null(opt$sampleClusterNumber) || opt$sampleClusterNumber<1) { + addComment("[ERROR]valid samples clusters number is required",T,opt$log) + q( "no", 1, F ) +} + +if (is.null(opt$dataTransformation)) { + addComment("[ERROR]data transformation option is required",T,opt$log) + q( "no", 1, F ) +} + +if (is.null(opt$distanceMeasure)) { + addComment("[ERROR]distance measure option is required",T,opt$log) + q( "no", 1, F ) +} + +if (is.null(opt$aggloMethod)) { + addComment("[ERROR]agglomeration method option is required",T,opt$log) + q( "no", 1, F ) +} + +if (is.null(opt$maxRows) || opt$maxRows<2) { + addComment("[ERROR]valid plotted row number is required",T,opt$log) + q( "no", 1, F ) +} + +if (!is.null(opt[["comparisonName"]]) && nchar(opt[["comparisonName"]])==0){ + addComment("[ERROR]you have to specify comparison",T,opt$log) + q( "no", 1, F ) +} + +if (!is.null(opt$comparisonNameLow) && nchar(opt$comparisonNameLow)==0){ + addComment("[ERROR]you have to specify comparisonLow",T,opt$log) + q( "no", 1, F ) +} + +if (!is.null(opt$comparisonNameHigh) && nchar(opt$comparisonNameHigh)==0){ + addComment("[ERROR]you have to specify comparisonHigh",T,opt$log) + q( "no", 1, F ) +} + +if (is.null(opt$genericData) && (!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh))){ + addComment("[ERROR]comparisonLow and comparisonHigh can be specified only with generic data",T,opt$log) + q( "no", 1, F ) +} + +if (!is.null(opt$genericData) && !is.null(opt[["comparisonName"]])){ + addComment("[ERROR]basic comparison cannot be specified for generic data",T,opt$log) + q( "no", 1, F ) +} + +if ((!is.null(opt[["comparisonName"]]) || !is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)) && is.null(opt$diffAnalyseFile)) { + addComment("[ERROR]'diff. exp. analysis file' is required",T,opt$log) + q( "no", 1, F ) +} + +if (!is.null(opt$genericData) && !is.null(opt$diffAnalyseFile) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh)){ + addComment("[ERROR]Missing comparison information for filtering",T,opt$log) + q( "no", 1, F ) +} + +if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && (is.null(opt[["comparisonName"]]) && is.null(opt$comparisonNameLow) && is.null(opt$comparisonNameHigh))) { + addComment("[ERROR]'comparisons' are missing for filtering",T,opt$log) + q( "no", 1, F ) +} + +if ((!is.null(opt$FCthreshold) || !is.null(opt$pvalThreshold)) && !is.null(opt$geneListFiltering)) { + addComment("[ERROR]Cannot have two filtering strategies",T,opt$log) + q( "no", 1, F ) +} + +verbose <- if (is.null(opt$quiet)) { + TRUE +}else{ + FALSE} + +addComment("[INFO]Parameters checked!",T,opt$log,display=FALSE) + +addComment(c("[INFO]Working directory: ",getwd()),TRUE,opt$log,display=FALSE) +addComment(c("[INFO]Command line: ",args),TRUE,opt$log,display=FALSE) + +#directory for plots and HTML +dir.create(file.path(getwd(), "plotDir")) +dir.create(file.path(getwd(), "plotLyDir")) + +#silent package loading +suppressPackageStartupMessages({ + library("plotly") + library("dendextend") + #library("ggdendro") + #library("plyr") + library("ggplot2") + library("heatmaply") + library("circlize") + #library("RColorBrewer") + #source("https://bioconductor.org/biocLite.R") + #biocLite("ComplexHeatmap") + library("ComplexHeatmap") + #library("processx") +}) + +expressionToCluster=!is.null(opt$expressionFile) + +#load input data files +if(expressionToCluster){ + #first expression data + expressionMatrix=read.csv(file=opt$expressionFile,header=F,sep="\t",colClasses="character") + #remove first row to convert it as colnames (to avoid X before colnames with header=T) + colNamesData=expressionMatrix[1,-1] + expressionMatrix=expressionMatrix[-1,] + #remove first colum to convert it as rownames + rowNamesData=expressionMatrix[,1] + expressionMatrix=expressionMatrix[,-1] + if(is.data.frame(expressionMatrix)){ + expressionMatrix=data.matrix(expressionMatrix) + }else{ + expressionMatrix=data.matrix(as.numeric(expressionMatrix)) + } + dimnames(expressionMatrix)=list(rowNamesData,colNamesData) + + #check input files + if (!is.numeric(expressionMatrix)) { + addComment("[ERROR]Expression data is not fully numeric!",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + addComment("[INFO]Expression data loaded and checked") + addComment(c("[INFO]Dim of expression matrix:",dim(expressionMatrix)),T,opt$log,display=FALSE) +} + +nbComparisons=0 +nbColPerContrast=5 +comparisonMatrix=NULL +comparisonMatrixInfoGene=NULL +#if available comparisons +if(!is.null(opt[["comparisonName"]])){ + #load results from differential expression analysis + #consider first row contains column names + comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t") + colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,])) + #remove the second line also as it's information line (p-val,FDR.p-val,FC,logFC) + comparisonMatrix=comparisonMatrix[-c(1,2),] + #remove first and second colums, convert the first one as rownames + rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1])) + #and save second column content that contain geneInfo + comparisonMatrixInfoGene=as.character(unlist(comparisonMatrix[,2])) + names(comparisonMatrixInfoGene)=as.character(unlist(comparisonMatrix[,1])) + comparisonMatrix=comparisonMatrix[,-c(1,2)] + + comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix)) + + if (ncol(comparisonMatrix)%%nbColPerContrast != 0) { + addComment("[ERROR]Diff. exp. data does not contain good number of columns per contrast, should contains in this order:p-val,FDR.p-val,FC,log2(FC) and t-stat",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + if(max(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])>1 || min(comparisonMatrix[,c(seq(1,ncol(comparisonMatrix),nbColPerContrast),seq(2,ncol(comparisonMatrix),nbColPerContrast))])<0){ + addComment("[ERROR]Seem that diff. exp. data does not contain correct values for p-val and FDR.p-val columns, should be including in [0,1] interval",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + if (!is.numeric(comparisonMatrix)) { + addComment("[ERROR]Diff. exp. data is not fully numeric!",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){ + addComment("[WARNING]All genes from diff. exp. file are not included in expression file",T,opt$log,display=FALSE) + } + + if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){ + addComment("[WARNING]All genes from expression file are not included in diff. exp. file",T,opt$log,display=FALSE) + } + + addComment("[INFO]Diff. exp. analysis loaded and checked",T,opt$log,display=FALSE) + addComment(c("[INFO]Dim of original comparison matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) + + #restrict to user specified comparisons + restrictedComparisons=unlist(strsplit(opt[["comparisonName"]],",")) + #should be improved to avoid selection of column names starting too similarly + colToKeep=which(unlist(lapply(colnames(comparisonMatrix),function(x)any(startsWith(x,restrictedComparisons))))) + comparisonMatrix=matrix(comparisonMatrix[,colToKeep],ncol=length(colToKeep),dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[colToKeep])) + + #get number of required comparisons + nbComparisons=ncol(comparisonMatrix)/nbColPerContrast + + addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) +} + +#should be only the case with generic data +if(!is.null(opt$comparisonNameLow) || !is.null(opt$comparisonNameHigh)){ + #load generic data used for filtering + nbColPerContrast=1 + #consider first row contains column names + comparisonMatrix=read.csv(file=opt$diffAnalyseFile,header=F,sep="\t") + colnames(comparisonMatrix)=as.character(unlist(comparisonMatrix[1,])) + #remove first colum, convert the first one as rownames + rownames(comparisonMatrix)=as.character(unlist(comparisonMatrix[,1])) + comparisonMatrix=comparisonMatrix[-1,-1] + + comparisonMatrix=matrix(as.numeric(as.matrix(comparisonMatrix)),ncol=ncol(comparisonMatrix),dimnames = dimnames(comparisonMatrix)) + + if (!is.numeric(comparisonMatrix)) { + addComment("[ERROR]Filtering matrix is not fully numeric!",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + if(expressionToCluster && length(setdiff(rownames(comparisonMatrix),rownames(expressionMatrix)))!=0){ + addComment("[WARNING]All genes from filtering file are not included in expression file",T,opt$log,display=FALSE) + } + + if(expressionToCluster && length(setdiff(rownames(expressionMatrix),rownames(comparisonMatrix)))!=0){ + addComment("[WARNING]All genes from expression file are not included in filtering file",T,opt$log,display=FALSE) + } + + addComment("[INFO]Filtering file loaded and checked",T,opt$log,display=FALSE) + addComment(c("[INFO]Dim of original filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) + + #restrict to user specified comparisons + restrictedComparisons=c() + if(!is.null(opt$comparisonNameLow))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameLow,",")))) + if(!is.null(opt$comparisonNameHigh))restrictedComparisons=unique(c(restrictedComparisons,unlist(strsplit(opt$comparisonNameHigh,",")))) + + if (!all(restrictedComparisons%in%colnames(comparisonMatrix))){ + addComment("[ERROR]Selected columns in filtering file are not present in filtering matrix!",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + comparisonMatrix=matrix(comparisonMatrix[,restrictedComparisons],ncol=length(restrictedComparisons),dimnames = list(rownames(comparisonMatrix),restrictedComparisons)) + + #get number of required comparisons + nbComparisons=ncol(comparisonMatrix) + + addComment(c("[INFO]Dim of effective filtering matrix:",dim(comparisonMatrix)),T,opt$log,display=FALSE) +} + + + +factorInfoMatrix=NULL +if(!is.null(opt$factorInfo)){ + #get group information + #load factors file + factorInfoMatrix=read.csv(file=opt$factorInfo,header=F,sep="\t",colClasses="character") + #remove first row to convert it as colnames + colnames(factorInfoMatrix)=factorInfoMatrix[1,] + factorInfoMatrix=factorInfoMatrix[-1,] + #use first colum to convert it as rownames but not removing it to avoid conversion as vector in unique factor case + rownames(factorInfoMatrix)=factorInfoMatrix[,1] + + factorBarColor=colnames(factorInfoMatrix)[2] + + if(ncol(factorInfoMatrix)>2){ + addComment("[ERROR]Factors file should not contain more than 2 columns",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + #factor file is used for color band on heatmap, so all expression matrix column should be in the factor file + if(expressionToCluster && length(setdiff(colnames(expressionMatrix),rownames(factorInfoMatrix)))!=0){ + addComment("[ERROR]Missing samples in factor file",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + #factor file is used for color band on heatmap, so all comparison matrix column should be in the factor file + if(!expressionToCluster && length(setdiff(colnames(comparisonMatrix),rownames(factorInfoMatrix)))!=0){ + addComment("[ERROR]Missing differential contrasts in factor file",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + addComment("[INFO]Factors OK",T,opt$log,display=FALSE) + addComment(c("[INFO]Dim of factorInfo matrix:",dim(factorInfoMatrix)),T,opt$log,display=FALSE) +} + +if(!is.null(opt$personalColors)){ + ##parse personal colors + personalColors=unlist(strsplit(opt$personalColors,",")) + if(length(personalColors)==2){ + ##add medium color between two to get three colors + personalColors=c(personalColors[1],paste(c("#",as.character(as.hexmode(floor(apply(col2rgb(personalColors),1,mean))))),collapse=""),personalColors[2]) + } + if(length(personalColors)!=3){ + addComment("[ERROR]Personalized colors doesn't contain enough colors",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + +} + + +if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="input"){ + #filter input data + + if(is.null(opt$geneListFiltering)){ + #filtering using stat thresholds + #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold))) + if(is.null(opt$genericData)){ + #diff. expression matrix + rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>log2(opt$FCthreshold))))!=0)))) + }else{ + #generic filtering matrix + rowToKeep=rownames(comparisonMatrix) + if(!is.null(opt$comparisonNameLow)){ + restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,",")) + rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0))))) + } + if(!is.null(opt$comparisonNameHigh)){ + restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,",")) + rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0))))) + } + } + }else{ + #filtering using user gene list + geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F) + rowToKeep=unlist(c(geneListFiltering)) + } + + if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){ + #should arrive only with user gene list filtering with diff.exp. results clustering + addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log) + rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix)) + } + + if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){ + addComment("[WARNING] some genes selected by the input filter are not in the expression file",T,opt$log) + rowToKeep=intersect(rowToKeep,rownames(expressionMatrix)) + } + + if(length(rowToKeep)==0){ + addComment("[ERROR]No gene survived to the input filtering thresholds, execution will be aborted. + Please consider to change threshold values and re-run the tool.",T,opt$log) + q( "no", 1, F ) + } + + #filter comparison matrix + if(!is.null(comparisonMatrix)){ + comparisonMatrix=matrix(comparisonMatrix[rowToKeep,],ncol=ncol(comparisonMatrix),dimnames = list(rowToKeep,colnames(comparisonMatrix))) + if(!is.null(comparisonMatrixInfoGene))comparisonMatrixInfoGene=comparisonMatrixInfoGene[rowToKeep] + } + #then expression matrix + if(expressionToCluster)expressionMatrix=matrix(expressionMatrix[rowToKeep,],ncol=ncol(expressionMatrix),dimnames = list(rowToKeep,colnames(expressionMatrix))) + + if(!is.null(comparisonMatrix) && expressionToCluster && nrow(comparisonMatrix)!=nrow(expressionMatrix)){ + addComment("[ERROR]Problem during input filtering, please check code",T,opt$log,display=FALSE) + q( "no", 1, F ) + } + + addComment("[INFO]Filtering step done",T,opt$log,display=FALSE) + addComment(c("[INFO]Input filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) +} + + +addComment("[INFO]Ready to plot",T,opt$log,display=FALSE) + +##--------------------- + +#plot heatmap +if(expressionToCluster){ + #will make clustering based on expression value or generic value + dataToHeatMap=expressionMatrix + valueMeaning="Intensity" + if(!is.null(opt$genericData))valueMeaning="Value" +}else{ + #will make clustering on log2(FC) values + dataToHeatMap=matrix(comparisonMatrix[,seq(4,ncol(comparisonMatrix),nbColPerContrast)],ncol=nbComparisons,dimnames = list(rownames(comparisonMatrix),colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)])) + valueMeaning="Log2(FC)" +} +addComment(c("[INFO]Dim of heatmap matrix:",dim(dataToHeatMap)),T,opt$log,display=FALSE) + +if(nrow(dataToHeatMap)==1 && ncol(dataToHeatMap)==1){ + addComment("[ERROR]Cannot make clustering with unique cell tab",T,opt$log,display=FALSE) + q( "no", 1, F ) +} + + +#apply data transformation if needed +if(opt$dataTransformation=="log"){ + dataToHeatMap=log(dataToHeatMap) + valueMeaning=paste(c("log(",valueMeaning,")"),collapse="") + addComment("[INFO]Data to cluster and to display in the heatmap are log transformed",T,opt$log,display=FALSE) +} +if(opt$dataTransformation=="log2"){ + dataToHeatMap=log2(dataToHeatMap) + valueMeaning=paste(c("log2(",valueMeaning,")"),collapse="") + addComment("[INFO]Data to cluster and to display in the heatmap are log2 transformed",T,opt$log,display=FALSE) +} + +maxRowsToDisplay=opt$maxRows + +nbClusters=opt$clusterNumber +if(nbClusters>nrow(dataToHeatMap)){ + #correct number of clusters if needed + nbClusters=nrow(dataToHeatMap) + addComment(c("[WARNING]Not enough rows to reach required clusters number, it is reduced to number of rows:",nbClusters),T,opt$log,display=FALSE) +} + +nbSampleClusters=opt$sampleClusterNumber +if(nbSampleClusters>ncol(dataToHeatMap)){ + #correct number of clusters if needed + nbSampleClusters=ncol(dataToHeatMap) + addComment(c("[WARNING]Not enough columns to reach required conditions clusters number, it is reduced to number of columns:",nbSampleClusters),T,opt$log,display=FALSE) +} + +colClust=FALSE +rowClust=FALSE +effectiveRowClust=FALSE + +#make appropriate clustering if needed +if(nrow(dataToHeatMap)>1 && nbClusters>1)rowClust=hclust(distExtended(dataToHeatMap,method = opt$distanceMeasure),method = opt$aggloMethod) +if(ncol(dataToHeatMap)>1 && nbSampleClusters>1)colClust=hclust(distExtended(t(dataToHeatMap),method = opt$distanceMeasure),method = opt$aggloMethod) + +if(nrow(dataToHeatMap)>maxRowsToDisplay){ + #make subsampling based on preliminary global clustering + #clusteringResults=cutree(rowClust,nbClusters) + #heatMapGenesToKeep=unlist(lapply(seq(1,nbClusters),function(x)sample(which(clusteringResults==x),min(length(which(clusteringResults==x)),round(maxRowsToDisplay/nbClusters))))) + ##OR + #basic subsampling + heatMapGenesToKeep=sample(rownames(dataToHeatMap),maxRowsToDisplay) + effectiveDataToHeatMap=matrix(dataToHeatMap[heatMapGenesToKeep,],ncol=ncol(dataToHeatMap),dimnames=list(heatMapGenesToKeep,colnames(dataToHeatMap))) + effectiveNbClusters=min(nbClusters,maxRowsToDisplay) + if(nrow(effectiveDataToHeatMap)>1 && effectiveNbClusters>1)effectiveRowClust=hclust(distExtended(effectiveDataToHeatMap, method = opt$distanceMeasure),method = opt$aggloMethod) + addComment(c("[WARNING]Too many rows for efficient heatmap drawing",maxRowsToDisplay,"subsampling is done for vizualization only"),T,opt$log,display=FALSE) + rm(heatMapGenesToKeep) +}else{ + effectiveDataToHeatMap=dataToHeatMap + effectiveRowClust=rowClust + effectiveNbClusters=nbClusters +} + +addComment(c("[INFO]Dim of plotted heatmap matrix:",dim(effectiveDataToHeatMap)),T,opt$log,display=FALSE) + +personalized_hoverinfo=matrix("",ncol = ncol(effectiveDataToHeatMap),nrow = nrow(effectiveDataToHeatMap),dimnames = dimnames(effectiveDataToHeatMap)) +if(expressionToCluster){ + for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\n",valueMeaning,": ",effectiveDataToHeatMap[iRow,iCol]),collapse="")}} +}else{ + for(iCol in colnames(effectiveDataToHeatMap)){for(iRow in rownames(effectiveDataToHeatMap)){personalized_hoverinfo[iRow,iCol]=paste(c("Probe: ",iRow,"\nCondition: ",iCol,"\nFC: ",round(2^effectiveDataToHeatMap[iRow,iCol],2)),collapse="")}} +} + +#trying to overcome limitation of heatmaply package to modify xtick and ytick label, using directly plotly functions, but for now plotly do not permit to have personalized color for each x/y tick separately +test=FALSE +if(test==TRUE){ + + #define dendogram shapes + dd.row <- as.dendrogram(effectiveRowClust) + dd.col <- as.dendrogram(colClust) + + #and color them + dd.row=color_branches(dd.row, k = effectiveNbClusters, groupLabels = T) + dd.col=color_branches(dd.col, k = nbSampleClusters, groupLabels = T) + + #generating function for dendogram from segment list + ggdend <- function(df) { + ggplot() + + geom_segment(data = df, aes(x=x, y=y, xend=xend, yend=yend)) + + labs(x = "", y = "") + theme_minimal() + + theme(axis.text = element_blank(), axis.ticks = element_blank(), + panel.grid = element_blank()) + } + + # generate x/y dendogram plots + px <- ggdend(dendro_data(dd.col)$segments) + py <- ggdend(dendro_data(dd.row)$segments) + coord_flip() + + # reshape data matrix + col.ord <- order.dendrogram(dd.col) + row.ord <- order.dendrogram(dd.row) + xx <- effectiveDataToHeatMap[row.ord, col.ord] + # and also personalized_hoverinfo + personalized_hoverinfo=personalized_hoverinfo[row.ord, col.ord] + + # hide axis ticks and grid lines + eaxis <- list( + showticklabels = FALSE, + showgrid = FALSE, + zeroline = FALSE + ) + + #make the empty plot + p_empty <- plot_ly() %>% + layout(margin = list(l = 200), + xaxis = eaxis, + yaxis = eaxis) + + heatmap.plotly <- plot_ly( + z = xx, x = 1:ncol(xx), y = 1:nrow(xx), colors = viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno"), + type = "heatmap", showlegend = FALSE, text = personalized_hoverinfo, hoverinfo = "text", + colorbar = list( + # Capitalise first letter + title = valueMeaning, + tickmode = "array", + len = 0.3 + ) + ) %>% + layout( + xaxis = list( + tickfont = list(size = 10,color=get_leaves_branches_col(dd.row)), + tickangle = 45, + tickvals = 1:ncol(xx), ticktext = colnames(xx), + linecolor = "#ffffff", + range = c(0.5, ncol(xx) + 0.5), + showticklabels = TRUE + ), + yaxis = list( + tickfont = list(size = 10, color=get_leaves_branches_col(dd.col)), + tickangle = 0, + tickvals = 1:nrow(xx), ticktext = rownames(xx), + linecolor = "#ffffff", + range = c(0.5, nrow(xx) + 0.5), + showticklabels = TRUE + ) + ) + + #generate plotly + pp <- subplot(px, p_empty, heatmap.plotly, py, nrows = 2, margin = 0,widths = c(0.8,0.2),heights = c(0.2,0.8), shareX = TRUE, + shareY = TRUE) + + #save image file + export(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) + #rise a bug due to token stuf + #orca(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) + + + #save plotLy file + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F) + + #htmlwidgets::saveWidget(as_widget(pp),"~/Bureau/test.html",selfcontained = F) + +}else{ #test + label_names=c("Probe","Condition",valueMeaning) + + # #color hclust objects + # dd.row=color_branches(effectiveRowClust, k = effectiveNbClusters) + # #rowColors=get_leaves_branches_col(dd.row) + # #rowColors[order.dendrogram(dd.row)]=rowColors + # rowGroup=cutree(effectiveRowClust, k = effectiveNbClusters) + # + # #get order of class as they will be displayed on the dendogram + # rowGroupRenamed=data.frame(cluster=mapvalues(rowGroup, unique(rowGroup[order.dendrogram(dd.row)[nleaves(dd.row):1]]), 1:effectiveNbClusters)) + # + # dd.col=color_branches(colClust, k = nbSampleClusters) + # #colColors=get_leaves_branches_col(dd.col) + # #colColors[order.dendrogram(dd.col)]=colColors + # colGroup=cutree(colClust, k = nbSampleClusters) + # + # # #get order of class as they will be displayed on the dendogram + # colGroupRenamed=data.frame(sampleCluster=mapvalues(colGroup, unique(colGroup[order.dendrogram(dd.col)[nleaves(dd.col):1]]), 1:nbSampleClusters)) + + + #while option is not correctly managed by heatmap apply, put personalized_hoverinfo to NULL + personalized_hoverinfo=NULL + + if(is.null(opt$personalColors)){ + heatmapColors=viridis(n = 101, alpha = 1, begin = 0, end = 1, option = "inferno") + }else{ + heatmapColors=personalColors + } + + colGroupRenamed=NULL + if(!is.null(factorInfoMatrix)){ + colGroupRenamed=eval(parse(text=(paste("data.frame(",factorBarColor,"=factorInfoMatrix[colnames(effectiveDataToHeatMap),2])",sep="")))) + sideBarGroupNb=length(table(factorInfoMatrix[colnames(effectiveDataToHeatMap),2])) + sideBarColorPaletteName="Spectral" + if(!is.null(opt$sideBarColorPalette) && opt$sideBarColorPalette%in%rownames(RColorBrewer::brewer.pal.info)){ + sideBarColorPaletteName=opt$sideBarColorPalette + } + sideBarColorPalette=setNames(colorRampPalette(RColorBrewer::brewer.pal(RColorBrewer::brewer.pal.info[sideBarColorPaletteName,"maxcolors"], sideBarColorPaletteName))(sideBarGroupNb),unique(factorInfoMatrix[colnames(effectiveDataToHeatMap),2])) + } + + if(!is.null(colGroupRenamed)){ + pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,col_side_colors=colGroupRenamed,col_side_palette=sideBarColorPalette,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors) + }else{ + pp <- heatmaply(effectiveDataToHeatMap,key.title = valueMeaning,k_row=effectiveNbClusters,k_col=nbSampleClusters,Rowv=effectiveRowClust,Colv=colClust,label_names=label_names,custom_hovertext=personalized_hoverinfo,plot_method = "plotly",colors = heatmapColors) + } + + + #save image file + export(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) + #rise a bug due to token stuf + #orca(pp, file = paste(c(file.path(getwd(), "plotDir"),"/Heatmap.",opt$format),collapse="")) + + + #save plotLy file + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/Heatmap.html"),collapse=""),selfcontained = F) + +} +addComment("[INFO]Heatmap drawn",T,opt$log,display=FALSE) + + +#plot circular heatmap +if(!class(effectiveRowClust)=="logical"){ + dendo=as.dendrogram(effectiveRowClust) + + if(is.null(opt$personalColors)){ + col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.01)), viridis(101,option = "inferno")) + }else{ + col_fun = colorRamp2(quantile(effectiveDataToHeatMap,probs = seq(0,1,0.5)), personalColors) + } + + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/circularPlot.pdf"),collapse=""))}else{ + png(paste(c("./plotDir/circularPlot.png"),collapse="")) + } + + circos.par(cell.padding = c(0, 0, 0, 0), gap.degree = 5) + circos.initialize(c(rep("a",nrow(effectiveDataToHeatMap)),"b"),xlim=cbind(c(0,0),c(nrow(effectiveDataToHeatMap),5))) + circos.track(ylim = c(0, 1), bg.border = NA, panel.fun = function(x, y) { + if(CELL_META$sector.index=="a"){ + nr = ncol(effectiveDataToHeatMap) + nc = nrow(effectiveDataToHeatMap) + circos.text(1:nc- 0.5, rep(0,nc), adj = c(0, 0), + rownames(effectiveDataToHeatMap)[order.dendrogram(dendo)], facing = "clockwise", niceFacing = TRUE, cex = 0.3) + } + }) + + circos.track(ylim = c(0, ncol(effectiveDataToHeatMap)), bg.border = NA, panel.fun = function(x, y) { + + m = t(matrix(effectiveDataToHeatMap[order.dendrogram(dendo),],ncol=ncol(effectiveDataToHeatMap))) + col_mat = col_fun(m) + nr = nrow(m) + nc = ncol(m) + if(CELL_META$sector.index=="a"){ + for(i in 1:nr) { + circos.rect(1:nc - 1, rep(nr - i, nc), + 1:nc, rep(nr - i + 1, nc), + border = col_mat[i, ], col = col_mat[i, ]) + } + }else{ + circos.text(rep(1,nr), seq(nr,1,-1) , colnames(effectiveDataToHeatMap),cex = 0.3) + } + }) + + #dendo = color_branches(dendo, k = effectiveNbClusters, col = colorRampPalette(brewer.pal(12,"Set3"))(effectiveNbClusters)) + dendo = color_branches(dendo, k = effectiveNbClusters, col = rev(colorspace::rainbow_hcl(effectiveNbClusters))) + + + circos.track(ylim = c(0, attributes(dendo)$height), bg.border = NA, track.height = 0.25, + panel.fun = function(x, y) { + if(CELL_META$sector.index=="a")circos.dendrogram(dendo)} ) + + circos.clear() + ##add legend + lgd_links = Legend(at = seq(ceiling(min(effectiveDataToHeatMap)),floor(max(effectiveDataToHeatMap)),ceiling((floor(max(effectiveDataToHeatMap))-ceiling(min(effectiveDataToHeatMap)))/4)), col_fun = col_fun, + title_position = "topleft", grid_width = unit(5, "mm") ,title = valueMeaning) + + pushViewport(viewport(x = 0.85, y = 0.80, + width = 0.1, + height = 0.1, + just = c("left", "bottom"))) + grid.draw(lgd_links) + upViewport() + + + dev.off() + + addComment("[INFO]Circular heatmap drawn",T,opt$log,display=FALSE) + loc <- Sys.setlocale("LC_NUMERIC","C") +}else{ + addComment(c("[WARNING]Circular plot will not be plotted considering row or cluster number < 2"),T,opt$log,display=FALSE) +} +rm(effectiveDataToHeatMap,effectiveRowClust,effectiveNbClusters) + +#plot screeplot +if(class(rowClust)!="logical" && nrow(dataToHeatMap)>2){ + screePlotData=c() + for(iNbClusters in 2:(nbClusters+min(10,max(0,nrow(dataToHeatMap)-nbClusters)))){ + clusteringResults=cutree(rowClust,iNbClusters) + #clusteringResults=kmeans(dataToHeatMap,iNbClusters)$cluster + + #compute variance between each intra-class points amongst themselves (need at least 3 points by cluster) + #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>2){var(dist(dataToHeatMap[temp,]))}else{0}}))) ) + #compute variance between each intra-class points and fictive mean point (need at least 2 points by cluster) + #screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ var(dist(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]))[1:length(temp)]) }else{0}}))) ) + if(ncol(dataToHeatMap)>1)screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ sum((distExtended(rbind(apply(dataToHeatMap[temp,],2,mean),dataToHeatMap[temp,]),method = opt$distanceMeasure)[1:length(temp)])^2) }else{0}}))) ) + else screePlotData=c(screePlotData,sum(unlist(lapply(seq(1,iNbClusters),function(x){temp=which(clusteringResults==x);if(length(temp)>1){ sum((dataToHeatMap[temp,]-mean(dataToHeatMap[temp,]))^2) }else{0}}))) ) + } + + dataToPlot=data.frame(clusterNb=seq(2,length(screePlotData)+1),wcss=screePlotData) + p <- ggplot(data=dataToPlot, aes(clusterNb,wcss)) + geom_point(colour="#EE4444") + geom_line(colour="#DD9999") + + ggtitle("Scree plot") + theme_bw() + xlab(label="Cluster number") + ylab(label="Within cluster sum of squares") + + theme(panel.border=element_blank(),plot.title = element_text(hjust = 0.5),legend.position = "none") + + scale_x_continuous(breaks=seq(min(dataToPlot$clusterNb), max(dataToPlot$clusterNb), 1)) + + #save plotly files + pp <- ggplotly(p) + + if(opt$format=="pdf"){ + pdf(paste(c("./plotDir/screePlot.pdf"),collapse=""))}else{ + png(paste(c("./plotDir/screePlot.png"),collapse="")) + } + plot(p) + dev.off() + + #save plotly files + htmlwidgets::saveWidget(as_widget(pp), paste(c(file.path(getwd(), "plotLyDir"),"/screePlot.html"),collapse=""),selfcontained = F) + + addComment("[INFO]Scree plot drawn",T,opt$log,display=FALSE) +}else{ + addComment(c("[WARNING]Scree plot will not be plotted considering row number <= 2"),T,opt$log,display=FALSE) +} + +##---------------------- + +#filter output based on parameters + +rowToKeep=rownames(dataToHeatMap) +if(!is.null(opt$filterInputOutput) && opt$filterInputOutput=="output"){ + #rowToKeep=intersect(which(comparisonMatrix[,seq(2,ncol(comparisonMatrix),4)]<=opt$pvalThreshold),which(abs(comparisonMatrix[,seq(4,ncol(comparisonMatrix),4)])>=log2(opt$FCthreshold))) + if(is.null(opt$geneListFiltering)){ + if(is.null(opt$genericData)){ + #diff. expression matrix + rowToKeep=names(which(unlist(apply(comparisonMatrix,1,function(x)length(intersect(which(x[seq(2,length(x),nbColPerContrast)]<=opt$pvalThreshold),which(abs(x[seq(4,length(x),nbColPerContrast)])>=log2(opt$FCthreshold))))!=0)))) + }else{ + #generic filtering matrix + rowToKeep=rownames(comparisonMatrix) + if(!is.null(opt$comparisonNameLow)){ + restrictedLowComparisons=unlist(strsplit(opt$comparisonNameLow,",")) + rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedLowComparisons]>opt$FCthreshold))!=0))))) + } + if(!is.null(opt$comparisonNameHigh)){ + restrictedHighComparisons=unlist(strsplit(opt$comparisonNameHigh,",")) + rowToKeep=intersect(rowToKeep,names(which(unlist(apply(comparisonMatrix,1,function(x)length(which(x[restrictedHighComparisons]<opt$pvalThreshold))!=0))))) + } + } + }else{ + geneListFiltering=read.csv(opt$geneListFiltering,as.is = 1,header=F) + rowToKeep=unlist(c(geneListFiltering)) + } + if(!is.null(comparisonMatrix) && !all(rowToKeep%in%rownames(comparisonMatrix))){ + #should arrive only with user gene list filtering with diff.exp. results clustering + addComment("[WARNING] some genes of the user defined list are not in the diff. exp. input file",T,opt$log) + rowToKeep=intersect(rowToKeep,rownames(comparisonMatrix)) + } + + if(expressionToCluster && !all(rowToKeep%in%rownames(expressionMatrix))){ + addComment("[WARNING] some genes selected by the output filter are not in the expression file",T,opt$log) + rowToKeep=intersect(rowToKeep,rownames(expressionMatrix)) + } + addComment(c("[INFO]Output filtering step:",length(rowToKeep),"remaining rows"),T,opt$log,display=FALSE) +} + +#we add differential analysis info in output if it was directly used for clustering or when it was used for filtering with expression + +#in case of expression or generic data clustering without filtering based on external stats +if(expressionToCluster && is.null(comparisonMatrix)){ + if(length(rowToKeep)==0){ + addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) + outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE) + }else{ + outputData=matrix(0,ncol=2,nrow=length(rowToKeep)+1) + outputData[1,]=c("Gene","Cluster") + outputData[2:(length(rowToKeep)+1),1]=rowToKeep + if(class(rowClust)!="logical" ){ + outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep] + }else{ + outputData[2:(length(rowToKeep)+1),2]=0 + } + } +} + +#in case of generic data clustering with filtering based on generic external data +if(!is.null(opt$genericData) && !is.null(comparisonMatrix)){ + if(length(rowToKeep)==0){ + addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) + outputData=matrix(c("Gene","Cluster","noGene","noClustering"),ncol=2,nrow=2,byrow = TRUE) + }else{ + outputData=matrix(0,ncol=2+nbComparisons,nrow=length(rowToKeep)+1) + outputData[1,]=c("Gene","Cluster",colnames(comparisonMatrix)) + outputData[2:(length(rowToKeep)+1),1]=rowToKeep + if(class(rowClust)!="logical" ){ + outputData[2:(length(rowToKeep)+1),2]=cutree(rowClust,nbClusters)[rowToKeep] + }else{ + outputData[2:(length(rowToKeep)+1),2]=0 + } + outputData[2:(length(rowToKeep)+1),3:(ncol(comparisonMatrix)+2)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4) + } +} + +#in case of expression data clustering with filtering based on diff. exp. results or diff. exp. results clustering +if(is.null(opt$genericData) && !is.null(comparisonMatrix)){ + if(length(rowToKeep)==0){ + addComment("[WARNING]No more gene after output filtering step, tabular output will be empty",T,opt$log,display=FALSE) + outputData=matrix(0,ncol=3,nrow=3) + outputData[1,]=c("","","Comparison") + outputData[2,]=c("Gene","Info","Cluster") + outputData[3,]=c("noGene","noInfo","noClustering") + }else{ + outputData=matrix(0,ncol=3+nbComparisons*nbColPerContrast,nrow=length(rowToKeep)+2) + outputData[1,]=c("","","Comparison",rep(colnames(comparisonMatrix)[seq(1,ncol(comparisonMatrix),nbColPerContrast)],each=nbColPerContrast)) + outputData[2,]=c("Gene","Info","Cluster",rep(c("p-val","FDR.p-val","FC","log2(FC)","t-stat"),nbComparisons)) + outputData[3:(length(rowToKeep)+2),1]=rowToKeep + outputData[3:(length(rowToKeep)+2),2]=comparisonMatrixInfoGene[rowToKeep] + if(class(rowClust)!="logical" ){ + outputData[3:(length(rowToKeep)+2),3]=cutree(rowClust,nbClusters)[rowToKeep] + }else{ + outputData[3:(length(rowToKeep)+2),3]=0 + } + outputData[3:(length(rowToKeep)+2),4:(ncol(comparisonMatrix)+3)]=prettyNum(comparisonMatrix[rowToKeep,],digits=4) + } +} + +addComment("[INFO]Formated output",T,opt$log,display=FALSE) +write.table(outputData,file=opt$outputFile,quote=FALSE,sep="\t",col.names = F,row.names = F) + +##---------------------- + +end.time <- Sys.time() +addComment(c("[INFO]Total execution time for R script:",as.numeric(end.time - start.time,units="mins"),"mins"),T,opt$log,display=FALSE) + + +addComment("[INFO]End of R script",T,opt$log,display=FALSE) + +printSessionInfo(opt$log) + +#sessionInfo() + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/utils.R Fri Jun 26 09:43:41 2020 -0400 @@ -0,0 +1,143 @@ +# Copyright (c) 2011-2013 Trevor L. Davis <trevor.l.davis@stanford.edu> +# +# This file is free software: you may copy, redistribute and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, either version 2 of the License, or (at your +# option) any later version. +# +# This file is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +#extendedDist function to correlation measure +distExtended <- function(x,method) { + if(method %in% c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"))return(dist(x,method = method)) + if(method %in% c("pearson", "spearman", "kendall"))return(as.dist(1-cor(t(x),method=method))/2) + if(method %in% c("absPearson", "absSpearman", "absKendall"))return(as.dist(1-abs(cor(t(x),method=method)))) + return(NULL) +} + +##comment function to display message and optionnaly add it to log file + +addComment <- function(text,addToFile=FALSE,fileName=NULL,append=TRUE,display=TRUE){ + if(display)cat(paste(c(text,"\n"),collapse = " ")) + if(addToFile)write(paste(text,collapse = " "),fileName,append=append) +} + +printSessionInfo <- function(fileName=NULL,append=TRUE){ + addComment("[INFO]R session info :",T,fileName,display=FALSE) + tempInfo=sessionInfo() + write(paste(tempInfo$R.version$version.string),fileName,append=append) + write(paste("Platform",tempInfo$platform,sep = " : "),fileName,append=append) + write(paste("Running under",tempInfo$running,sep = " : "),fileName,append=append) + write(paste("Local variables",tempInfo$locale,sep = " : "),fileName,append=append) + write(paste("Attached base packages",paste(tempInfo$basePkgs,collapse = "; "),sep = " : "),fileName,append=append) + if(length(tempInfo$otherPkgs)>0){ + lineToPrint="" + for(iPack in tempInfo$otherPkgs){ + lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "") + } + write(paste("Other attached packages",lineToPrint,sep = " : "),fileName,append=append) + } + if(length(tempInfo$loadedOnly)>0){ + lineToPrint="" + for(iPack in tempInfo$loadedOnly){ + lineToPrint=paste(lineToPrint,iPack$Package," ",iPack$Version,"; ",sep = "") + } + write(paste("Loaded packages",lineToPrint,sep = " : "),fileName,append=append) + } +} + +##negative of a mathematical expression +negativeExpression <- function(expression){ + expression=gsub("\\+","_toMinus_",expression) + expression=gsub("\\-","+",expression) + expression=gsub("_toMinus_","-",expression) + if(substr(expression,1,1)!="-" && substr(expression,1,1)!="+"){ + expression=paste(c("-",expression),collapse="") + } + + return(expression) +} + +#' Returns file name of calling Rscript +#' +#' \code{get_Rscript_filename} returns the file name of calling Rscript +#' @return A string with the filename of the calling script. +#' If not found (i.e. you are in a interactive session) returns NA. +#' +#' @export +get_Rscript_filename <- function() { + prog <- sub("--file=", "", grep("--file=", commandArgs(), value=TRUE)[1]) + if( .Platform$OS.type == "windows") { + prog <- gsub("\\\\", "\\\\\\\\", prog) + } + prog +} + +#' Recursively sorts a list +#' +#' \code{sort_list} returns a sorted list +#' @param unsorted_list A list. +#' @return A sorted list. +#' @export +sort_list <- function(unsorted_list) { + for(ii in seq(along=unsorted_list)) { + if(is.list(unsorted_list[[ii]])) { + unsorted_list[[ii]] <- sort_list(unsorted_list[[ii]]) + } + } + unsorted_list[sort(names(unsorted_list))] +} + + +# Multiple plot function +# +# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) +# - cols: Number of columns in layout +# - layout: A matrix specifying the layout. If present, 'cols' is ignored. +# +# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), +# then plot 1 will go in the upper left, 2 will go in the upper right, and +# 3 will go all the way across the bottom. +# +multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { + library(grid) + + # Make a list from the ... arguments and plotlist + plots <- c(list(...), plotlist) + + numPlots = length(plots) + + # If layout is NULL, then use 'cols' to determine layout + if (is.null(layout)) { + # Make the panel + # ncol: Number of columns of plots + # nrow: Number of rows needed, calculated from # of cols + layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), + ncol = cols, nrow = ceiling(numPlots/cols)) + } + + if (numPlots==1) { + print(plots[[1]]) + + } else { + # Set up the page + grid.newpage() + pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) + + # Make each plot, in the correct location + for (i in 1:numPlots) { + # Get the i,j matrix positions of the regions that contain this subplot + matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) + + print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, + layout.pos.col = matchidx$col)) + } + } +}