Mercurial > repos > bimib > cobraxy
changeset 313:e796d29076be draft
Uploaded
author | francesco_lapi |
---|---|
date | Mon, 26 May 2025 16:00:58 +0000 |
parents | a99667e35947 |
children | 70e6d07d9c2d |
files | COBRAxy/marea.py |
diffstat | 1 files changed, 14 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/COBRAxy/marea.py Mon May 26 14:08:23 2025 +0000 +++ b/COBRAxy/marea.py Mon May 26 16:00:58 2025 +0000 @@ -771,17 +771,27 @@ None : mutates the comparisonResult dictionary in place with the p-values. """ + # pyDESeq2 needs at least 2 replicates per sample so I check this + if len(dataset1Data[0]) < 2 or len(dataset2Data[0]) < 2: + raise ValueError("Datasets must have at least 2 replicates each") + # pyDESeq2 is based on pandas, so we need to convert the data into a DataFrame and clean it from NaN values dataframe1 = pd.DataFrame(dataset1Data, index=ids) dataframe2 = pd.DataFrame(dataset2Data, index=ids) - + + # pyDESeq2 requires datasets to be samples x reactions and integer values dataframe1_clean = dataframe1.dropna(axis=0, how="any").T.astype(int) dataframe2_clean = dataframe2.dropna(axis=0, how="any").T.astype(int) + dataframe1_clean.index = [f"ds1_rep{i+1}" for i in range(dataframe1_clean.shape[0])] + dataframe2_clean.index = [f"ds2_rep{j+1}" for j in range(dataframe2_clean.shape[0])] - # pyDESeq2 works on a DataFrame with values and another with infos about samples and conditions + # pyDESeq2 works on a DataFrame with values and another with infos about how samples are split (like dataset class) dataframe = pd.concat([dataframe1_clean, dataframe2_clean], axis=0) - metadata = pd.DataFrame(np.concatenate([np.full(dataframe1_clean.shape[0], "dataset1"), np.full(dataframe2_clean.shape[0], "dataset2")]), columns=["dataset"]) - metadata.index = dataframe.index + metadata = pd.DataFrame({"dataset": (["dataset1"]*dataframe1_clean.shape[0] + ["dataset2"]*dataframe2_clean.shape[0])}, index=dataframe.index) + + # Ensure the index of the metadata matches the index of the dataframe + if not dataframe.index.equals(metadata.index): + raise ValueError("The index of the metadata DataFrame must match the index of the counts DataFrame.") # Prepare and run pyDESeq2 inference = DefaultInference()