Previous changeset 1:23671dd35026 (2018-09-19) Next changeset 3:e446b013c048 (2019-02-01) |
Commit message:
planemo upload commit bdd7e8a1f08c11db2a9f1b6db5535c6d32153b2b |
modified:
peptide_atlas.loc.sample |
added:
Get_ms-ms_observations.R Get_ms-ms_observations.xml |
removed:
proteore_ms_observation_pepatlas.R proteore_ms_observation_pepatlas.xml |
b |
diff -r 23671dd35026 -r 6ab9d2778f04 Get_ms-ms_observations.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Get_ms-ms_observations.R Tue Dec 18 09:50:50 2018 -0500 |
[ |
@@ -0,0 +1,106 @@ +# Read file and return file content as data.frame +read_file <- function(path,header){ + file <- try(read.csv(path,header=header, sep="\t",stringsAsFactors = FALSE, quote="\"", check.names = F),silent=TRUE) + if (inherits(file,"try-error")){ + stop("File not found !") + }else{ + return(file) + } +} + +str2bool <- function(x){ + if (any(is.element(c("t","true"),tolower(x)))){ + return (TRUE) + }else if (any(is.element(c("f","false"),tolower(x)))){ + return (FALSE) + }else{ + return(NULL) + } +} + +nb_obs_PeptideAtlas <- function(input, atlas_file) { + ## Calculate the sum of n_observations for each ID in input + atlas = read_file(atlas_file, T) + return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) +} + +main = function() { + args <- commandArgs(TRUE) + if(length(args)<1) { + args <- c("--help") + } + + # Help section + if("--help" %in% args) { + cat("Selection and Annotation HPA + Arguments: + --input_type: type of input (list of id or filename) + --input: input + --atlas: list of file(s) path to use + --output: text output filename \n") + q(save="no") + } + + # Parse arguments + parseArgs <- function(x) strsplit(sub("^--", "", x), "=") + argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) + args <- as.list(as.character(argsDF$V2)) + names(args) <- argsDF$V1 + + #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") + #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") + + # Extract input + input_type = args$input_type + if (input_type == "list") { + input = strsplit(args$input, "[ \t\n]+")[[1]] + } else if (input_type == "file") { + filename = args$input + ncol = args$column + # Check ncol + if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { + stop("Please enter an integer for level") + } else { + ncol = as.numeric(gsub("c", "", ncol)) + } + header = str2bool(args$header) + file = read_file(filename, header) + input = sapply(file[,ncol],function(x) strsplit(as.character(x),";")[[1]][1],USE.NAMES = F) + } + + output = args$output + + #function to create a list of infos from file path + extract_info_from_path <- function(path) { + file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] + date=tail(strsplit(file_name,"_")[[1]],n=1) + tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") + return (c(date,tissue,file_name,path)) + } + + #data_frame building + paths=strsplit(args$atlas,",")[[1]] + tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) + df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) + names(df) <- c("date","tissue","filename","path") + + # Annotations + res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) + + colnames(res)=df$filename + + # Write output + if (input_type == "list") { + res = cbind(as.matrix(input), res) + colnames(res)[1] = "Uniprot accession number" + } else if (input_type == "file") { + res = cbind(file, res) + } + res = as.data.frame(apply(res, c(1,2), function(x) gsub("^$|^ $", NA, x))) + write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) + +} + +main() +#Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine" + |
b |
diff -r 23671dd35026 -r 6ab9d2778f04 Get_ms-ms_observations.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Get_ms-ms_observations.xml Tue Dec 18 09:50:50 2018 -0500 |
[ |
@@ -0,0 +1,128 @@ +<tool id="retr_pepatlas1" name="Get MS/MS observations in tissue/fluid" version="2018.12.18"> + <description>[Peptide Atlas]</description> + <requirements> + <requirement type="package" version="3.4.1">R</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command interpreter="Rscript"> + $__tool_directory__/Get_ms-ms_observations.R + --input_type="$input.ids" + #if $input.ids == "list" + --input="$input.list" + #else + --input="$input.file" + --column_number="$input.ncol" + --header="$input.header" + #end if + --atlas=${",".join([$__tool_data_path__+"/"+str(ref) for ref in str($atlas).split(",")])} + --output="$output" + </command> + + <inputs> + <conditional name="input" > + <param name="ids" type="select" label="Enter your IDs (UniProt Accession number only)" help="Copy/paste or from a file (e.g. table)" > + <option value="list">Copy/paste your IDs</option> + <option value="file" selected="true">Input file containing your IDs </option> + </param> + <when value="list" > + <param name="list" type="text" label="Copy/paste your IDs" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' > + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> + </when> + <when value="file" > + <param name="file" type="data" format="txt,tabular" label="Select your file" help="" /> + <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" /> + <param name="ncol" type="text" value="c1" label="Column of IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> + </when> + </conditional> + <!-- Peptide Atlas options --> + <param name="atlas" type="select" display="checkboxes" multiple="true" label="Proteomics dataset (biological sample)" optional="false" > + <options from_data_table="peptide_atlas"> + <filter type="sort_by" column="2"/> + </options> + </param> + </inputs> + + <outputs> + <data name="output" format="tsv" label=""/> + </outputs> + + <tests> + <test> + <conditional name="input"> + <param name="ids" value="file"/> + <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt"/> + <param name="ncol" value="c1"/> + <param name="header" value="true"/> + </conditional> + + <!--${__HERE__} does not work here, you must put absolute path here and in test-data/peptide_atlas.loc --> + <param name="atlas" value="${__HERE__}/test-data/Human_Brain_18-07-2018.tsv"/> + + <output name="output" file="PeptideAtlas_output.txt"/> + </test> + </tests> + + <help><![CDATA[ + +**Description** + +Given a list of Uniprot accession number this tool indicates the number of times a protein has(ve) been observed in a given sample using LC-MS/MS proteomics approach. +Could be of interest for people who wants to know to what extent a protein is detectable (and to roughly estimate its level) in a given sample using MS-based proteomics. + +---- + +**Input** + +A list of Uniprot accession number (e.g. Q12860) provided either in a copy/paste mode or in the form of a file (in this case, it is necessary to specify the column where are your Uniprot accession number). + +If your input file or list contains other type of IDs, please use the ID_Converter tool. + +---- + +**Parameters** + +"Proteomics dataset (biological sample)": available human biological samples analyzed by MS/MS are the following: brain, heart, kidney, liver, plasma, urine and cerebrospinal fluid (CSF). + +---- + +**Output** + +Additional columns are created for each selected proteomics sample reporting the number of times all peptides corresponding to a protein have been observed by LC-MS/MS according to Peptide Atlas. + +a “NA” means that no information has been reported suggesting that this protein has not been observed in the sample of interest. + +---- + +**Data source (release date)** + +Data were retrieved from Peptide Atlas release (January 2018) through their query interface. + +---- + +.. class:: infomark + +**Authors** + +T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR + +Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR + +This work has been partially funded through the French National Agency for Research (ANR) IFB project. + +Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. + + ]]></help> + <citations> + </citations> +</tool> + |
b |
diff -r 23671dd35026 -r 6ab9d2778f04 peptide_atlas.loc.sample --- a/peptide_atlas.loc.sample Wed Sep 19 05:24:38 2018 -0400 +++ b/peptide_atlas.loc.sample Tue Dec 18 09:50:50 2018 -0500 |
b |
@@ -3,3 +3,10 @@ #This is a tab separated file (TAB, not 4 spaces !) # #<tissue> <name> <value> +Human_Brain Human Brain 25/07/2018 peptide_atlas/Human_Brain_25-07-2018.tsv +Human_CSF Human CSF 25/07/2018 peptide_atlas/Human_CSF_25-07-2018.tsv +Human_Heart Human Heart 25/07/2018 peptide_atlas/Human_Heart_25-07-2018.tsv +Human_Kidney Human Kidney 25/07/2018 peptide_atlas/Human_Kidney_25-07-2018.tsv +Human_Liver Human Liver 25/07/2018 peptide_atlas/Human_Liver_25-07-2018.tsv +Human_Plasma Human Plasma 25/07/2018 peptide_atlas/Human_Plasma_25-07-2018.tsv +Human_Urine Human Urine 25/07/2018 peptide_atlas/Human_Urine_25-07-2018.tsv |
b |
diff -r 23671dd35026 -r 6ab9d2778f04 proteore_ms_observation_pepatlas.R --- a/proteore_ms_observation_pepatlas.R Wed Sep 19 05:24:38 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,122 +0,0 @@ -# Read file and return file content as data.frame -readfile <- function(filename, header) { - if (header == "true") { - # Read only first line of the file as header: - headers <- try(read.table(filename, nrows = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = ""),silent=TRUE) - if (!inherits(headers, 'try-error')){ - file - } else { - stop("Your file seems to be empty, 'number of MS/MS observations in a tissue' tool stopped !") - } - #Read the data of the files (skipping the first row) - file <- read.table(filename, skip = 1, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = "") - # Remove empty rows - file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] - #And assign the header to the data - names(file) <- headers - } - else { - file <- try(read.table(filename, header = FALSE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE, na.strings=c("", "NA"), blank.lines.skip = TRUE, quote = ""),silent=TRUE) - if (!inherits(file, 'try-error')){ - file - } else { - stop("Your file seems to be empty, 'number of MS/MS observations in a tissue' tool stopped !") - } - # Remove empty rows - file <- file[!apply(is.na(file) | file == "", 1, all), , drop=FALSE] - } - return(file) -} - -nb_obs_PeptideAtlas <- function(input, atlas_file) { - ## Calculate the sum of n_observations for each ID in input - atlas = readfile(atlas_file, "true") - return(atlas$nb_obs[match(input,atlas$Uniprot_AC)]) -} - -main = function() { - args <- commandArgs(TRUE) - if(length(args)<1) { - args <- c("--help") - } - - # Help section - if("--help" %in% args) { - cat("Selection and Annotation HPA - Arguments: - --input_type: type of input (list of id or filename) - --input: input - --atlas: list of file(s) path to use - --output: text output filename \n") - q(save="no") - } - - # Parse arguments - parseArgs <- function(x) strsplit(sub("^--", "", x), "=") - argsDF <- as.data.frame(do.call("rbind", parseArgs(args))) - args <- as.list(as.character(argsDF$V2)) - names(args) <- argsDF$V1 - - #save(args,file="/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") - #load("/home/dchristiany/proteore_project/ProteoRE/tools/retrieve_msbased_pepatlas/args.Rda") - - # Extract input - input_type = args$input_type - if (input_type == "list") { - input = strsplit(args$input, "[ \t\n]+")[[1]] - } else if (input_type == "file") { - filename = args$input - ncol = args$column - # Check ncol - if (! as.numeric(gsub("c", "", ncol)) %% 1 == 0) { - stop("Please enter an integer for level") - } else { - ncol = as.numeric(gsub("c", "", ncol)) - } - header = args$header - # Get file content - file = readfile(filename, header) - # Extract Protein IDs list - input = c() - for (row in as.character(file[,ncol])) { - input = c(input, strsplit(row, ";")[[1]][1]) - } - } - - output = args$output - - #function to create a list of infos from file path - extract_info_from_path <- function(path) { - file_name=strsplit(tail(strsplit(path,"/")[[1]],n=1),"\\.")[[1]][1] - date=tail(strsplit(file_name,"_")[[1]],n=1) - tissue=paste(strsplit(file_name,"_")[[1]][1:2],collapse="_") - return (c(date,tissue,file_name,path)) - } - - #data_frame building - paths=strsplit(args$atlas,",")[[1]] - tmp <- sapply(paths, extract_info_from_path,USE.NAMES = FALSE) - df <- as.data.frame(t(as.data.frame(tmp)),row.names = c(""),stringsAsFactors = FALSE) - names(df) <- c("date","tissue","filename","path") - - # Annotations - res = sapply(df$path, function(x) nb_obs_PeptideAtlas(input, x), USE.NAMES = FALSE) - names=df$filename - - # Write output - if (input_type == "list") { - res = cbind(as.matrix(input), res) - names = c("Uniprot accession number", names) - colnames(res) = names - write.table(res, output, row.names = FALSE, sep = "\t", quote = FALSE) - } else if (input_type == "file") { - names = c(names(file), names) - output_content = cbind(file, res) - colnames(output_content) = names - write.table(output_content, output, row.names = FALSE, sep = "\t", quote = FALSE) - } -} - -main() -#Rscript retrieve_peptideatlas.R --input_type="file" --input="test-data/FKW_Lacombe_et_al_2017_OK.txt" --atlas_brain="Human_Brain_201803_PeptideAtlas.txt" --column="c1" --header="true" --output="test-data/PeptideAtlas_output.txt" --atlas_urine="Human_Urine_201803_PeptideAtlas.txt" --atlas="brain,urine" - |
b |
diff -r 23671dd35026 -r 6ab9d2778f04 proteore_ms_observation_pepatlas.xml --- a/proteore_ms_observation_pepatlas.xml Wed Sep 19 05:24:38 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,118 +0,0 @@ -<tool id="retr_pepatlas1" name="Retrieve number of MS/MS observations in a tissue from Peptide Atlas" version="2018.09.19"> - <description> - – homo sapiens only - </description> - <requirements> - <requirement type="package" version="3.4.1">R</requirement> - </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command interpreter="Rscript"> - $__tool_directory__/proteore_ms_observation_pepatlas.R - --input_type="$input.ids" - #if $input.ids == "list" - --input="$input.list" - #else - --input="$input.file" - --column_number="$input.ncol" - --header="$input.header" - #end if - --atlas="$atlas" - --output="$output" - </command> - - <inputs> - <conditional name="input" > - <param name="ids" type="select" label="Provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" > - <option value="list">Copy/paste your identifiers</option> - <option value="file" selected="true">Input file containing your identifiers</option> - </param> - <when value="list" > - <param name="list" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' > - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - <mapping initial="none"> - <add source="'" target="__sq__"/> - </mapping> - </sanitizer> - </param> - </when> - <when value="file" > - <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of Uniprot IDs" help="" /> - <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" /> - <param name="ncol" type="text" value="c1" label="The column number of Uniprot IDs to map" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on' /> - </when> - </conditional> - <!-- Peptide Atlas options --> - <param name="atlas" type="select" display="checkboxes" multiple="true" label="Please select proteomics dataset (sample)"> - <options from_data_table="peptide_atlas"> - <filter type="sort_by" column="2"/> - </options> - </param> - </inputs> - - <outputs> - <data name="output" format="tabular" label=""/> - </outputs> - - <tests> - <test> - <conditional name="input"> - <param name="ids" value="file"/> - <param name="file" value="FKW_Lacombe_et_al_2017_OK.txt"/> - <param name="ncol" value="c1"/> - <param name="header" value="true"/> - </conditional> - - <!--${__HERE__} does not work here, you must put absolute path here and in test-data/peptide_atlas.loc --> - <param name="atlas" value="${__HERE__}/test-data/Human_Brain_18-07-2018.tsv"/> - - <output name="output" file="PeptideAtlas_output.txt"/> - </test> - </tests> - - <help><![CDATA[ - -Given a list of Uniprot accession number this tool indicates the number of times a protein -has(ve) been observed in a given sample using LC-MS/MS proteomics approach. -Could be of interest for people who wants to know to what extent a protein -is detectable (and to roughly estimate its level) in a given sample using proteomics. -Available human biological samples are the following: brain, heart, kidney, -liver, plasma, urine and cerebrospinal fluid (CSF). Data were retrieved from -Peptide Atlas release (Jan 2018). - -**Input required** - -A list of Uniprot accession number (e.g. Q12860) provided either in the form of -a file (if you choose a file, it is necessary to specify the column where -are your Uniprot accession number) or in a copy/paste mode. If your input file -or list contains other type of IDs, please use the ID_Converter tool to convert -yours into Uniprot accession number. - -**Output** - -Additional columns are created for each selected proteomics sample reporting -the number of times all peptides corresponding to a protein have been observed by -LC-MS/MS according to Peptide Atlas. “NA” means that no information has been reported -suggesting that this protein has not been observed in the sample of interest. - ----- - -**Authors** - -T.P. Lien Nguyen, David Christiany, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR - -Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform - -This work has been partially funded through the French National Agency for Research (ANR) IFB project. - -Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool. - - ]]></help> - <citations> - </citations> -</tool> - |