Mercurial > repos > bimib > cobraxy
comparison COBRAxy/marea.py @ 313:e796d29076be draft
Uploaded
| author | francesco_lapi |
|---|---|
| date | Mon, 26 May 2025 16:00:58 +0000 |
| parents | 38c9a958ea78 |
| children | bfe98b0f04fc |
comparison
equal
deleted
inserted
replaced
| 312:a99667e35947 | 313:e796d29076be |
|---|---|
| 769 | 769 |
| 770 Returns: | 770 Returns: |
| 771 None : mutates the comparisonResult dictionary in place with the p-values. | 771 None : mutates the comparisonResult dictionary in place with the p-values. |
| 772 """ | 772 """ |
| 773 | 773 |
| 774 # pyDESeq2 needs at least 2 replicates per sample so I check this | |
| 775 if len(dataset1Data[0]) < 2 or len(dataset2Data[0]) < 2: | |
| 776 raise ValueError("Datasets must have at least 2 replicates each") | |
| 777 | |
| 774 # pyDESeq2 is based on pandas, so we need to convert the data into a DataFrame and clean it from NaN values | 778 # pyDESeq2 is based on pandas, so we need to convert the data into a DataFrame and clean it from NaN values |
| 775 dataframe1 = pd.DataFrame(dataset1Data, index=ids) | 779 dataframe1 = pd.DataFrame(dataset1Data, index=ids) |
| 776 dataframe2 = pd.DataFrame(dataset2Data, index=ids) | 780 dataframe2 = pd.DataFrame(dataset2Data, index=ids) |
| 777 | 781 |
| 782 # pyDESeq2 requires datasets to be samples x reactions and integer values | |
| 778 dataframe1_clean = dataframe1.dropna(axis=0, how="any").T.astype(int) | 783 dataframe1_clean = dataframe1.dropna(axis=0, how="any").T.astype(int) |
| 779 dataframe2_clean = dataframe2.dropna(axis=0, how="any").T.astype(int) | 784 dataframe2_clean = dataframe2.dropna(axis=0, how="any").T.astype(int) |
| 780 | 785 dataframe1_clean.index = [f"ds1_rep{i+1}" for i in range(dataframe1_clean.shape[0])] |
| 781 # pyDESeq2 works on a DataFrame with values and another with infos about samples and conditions | 786 dataframe2_clean.index = [f"ds2_rep{j+1}" for j in range(dataframe2_clean.shape[0])] |
| 787 | |
| 788 # pyDESeq2 works on a DataFrame with values and another with infos about how samples are split (like dataset class) | |
| 782 dataframe = pd.concat([dataframe1_clean, dataframe2_clean], axis=0) | 789 dataframe = pd.concat([dataframe1_clean, dataframe2_clean], axis=0) |
| 783 metadata = pd.DataFrame(np.concatenate([np.full(dataframe1_clean.shape[0], "dataset1"), np.full(dataframe2_clean.shape[0], "dataset2")]), columns=["dataset"]) | 790 metadata = pd.DataFrame({"dataset": (["dataset1"]*dataframe1_clean.shape[0] + ["dataset2"]*dataframe2_clean.shape[0])}, index=dataframe.index) |
| 784 metadata.index = dataframe.index | 791 |
| 792 # Ensure the index of the metadata matches the index of the dataframe | |
| 793 if not dataframe.index.equals(metadata.index): | |
| 794 raise ValueError("The index of the metadata DataFrame must match the index of the counts DataFrame.") | |
| 785 | 795 |
| 786 # Prepare and run pyDESeq2 | 796 # Prepare and run pyDESeq2 |
| 787 inference = DefaultInference() | 797 inference = DefaultInference() |
| 788 dds = DeseqDataSet(counts=dataframe, metadata=metadata, design="~dataset", inference=inference, quiet=True, low_memory=True) | 798 dds = DeseqDataSet(counts=dataframe, metadata=metadata, design="~dataset", inference=inference, quiet=True, low_memory=True) |
| 789 dds.deseq2() | 799 dds.deseq2() |
