import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import cimcb_lite as cb

print('All packages successfully loaded')

All packages successfully loaded


# The path to the input file (Excel spreadsheet)
filename = 'GastricCancer_NMR.xlsx'

# Load Peak and Data tables into two variables
dataTable, peakTable = cb.utils.load_dataXL(filename, DataSheet='Data', PeakSheet='Peak')

Loadings PeakFile: Peak
Loadings DataFile: Data
Data Table & Peak Table is suitable.
TOTAL SAMPLES: 140 TOTAL PEAKS: 149
Done!


display(dataTable) # View and check the dataTable


display(peakTable) # View and check PeakTable


# Create a clean peak table 

rsd = peakTable['QC_RSD']  
percMiss = peakTable['Perc_missing']  
peakTableClean = peakTable[(rsd < 20) & (percMiss < 10)]   

print("Number of peaks remaining: {}".format(len(peakTableClean)))

Number of peaks remaining: 52


# Extract and scale the metabolite data from the dataTable 

peaklist = peakTableClean['Name']                   # Set peaklist to the metabolite names in the peakTableClean
X = dataTable[peaklist].values                      # Extract X matrix from dataTable using peaklist
Xlog = np.log10(X)                                  # Log scale (base-10)
Xscale = cb.utils.scale(Xlog, method='auto')        # methods include auto, pareto, vast, and level
Xknn = cb.utils.knnimpute(Xscale, k=3)              # missing value imputation (knn - 3 nearest neighbors)

print("Xknn: {} rows & {} columns".format(*Xknn.shape))

cb.plot.pca(Xknn,
            pcx=1,                                                  # pc for x-axis
            pcy=2,                                                  # pc for y-axis
            group_label=dataTable['SampleType'])                    # labels for Hover in PCA loadings plot

Xknn: 140 rows & 52 columns


# Select subset of Data for statistical comparison
dataTable2 = dataTable[(dataTable.Class == "GC") | (dataTable.Class == "HE")]  # Reduce data table only to GC and HE class members
pos_outcome = "GC" 

# Calculate basic statistics and create a statistics table.
statsTable = cb.utils.univariate_2class(dataTable2,
                                        peakTableClean,
                                        group='Class',                # Column used to determine the groups
                                        posclass=pos_outcome,         # Value of posclass in the group column
                                        parametric=True)              # Set parametric = True or False

# View and check StatsTable
display(statsTable)


# Save StatsTable to Excel
statsTable.to_excel("stats.xlsx", sheet_name='StatsTable', index=False)
print("done!")

done!


# Create a Binary Y vector for stratifiying the samples
outcomes = dataTable2['Class']                                  # Column that corresponds to Y class (should be 2 groups)
Y = [1 if outcome == 'GC' else 0 for outcome in outcomes]       # Change Y into binary (GC = 1, HE = 0)  
Y = np.array(Y)                                                 # convert boolean list into to a numpy array


# Split dataTable2 and Y into train and test (with stratification)
dataTrain, dataTest, Ytrain, Ytest = train_test_split(dataTable2, Y, test_size=0.25, stratify=Y,random_state=10)

print("DataTrain = {} samples with {} positive cases.".format(len(Ytrain),sum(Ytrain)))
print("DataTest = {} samples with {} positive cases.".format(len(Ytest),sum(Ytest)))

DataTrain = 62 samples with 32 positive cases.
DataTest = 21 samples with 11 positive cases.


# Extract and scale the metabolite data from the dataTable
peaklist = peakTableClean['Name']                           # Set peaklist to the metabolite names in the peakTableClean
XT = dataTrain[peaklist]                                    # Extract X matrix from DataTrain using peaklist
XTlog = np.log(XT)                                          # Log scale (base-10)
XTscale = cb.utils.scale(XTlog, method='auto')              # methods include auto, pareto, vast, and level
XTknn = cb.utils.knnimpute(XTscale, k=3)                    # missing value imputation (knn - 3 nearest neighbors)


# initalise cross_val kfold (stratified) 
cv = cb.cross_val.kfold(model=cb.model.PLS_SIMPLS,                   # model; we are using the PLS_SIMPLS model
                        X=XTknn,                                 
                        Y=Ytrain,                               
                        param_dict={'n_components': [1,2,3,4,5,6]},  # The numbers of latent variables to search                
                        folds=5,                                     # folds; for the number of splits (k-fold)
                        bootnum=100)                                 # num bootstraps for the Confidence Intervals

# run the cross validation
cv.run()

Kfold: 100%|██████████| 100/100 [00:07<00:00, 14.26it/s]


cv.plot() # plot cross validation statistics


modelPLS = cb.model.PLS_SIMPLS(n_components=2)  # Initalise the model with n_components = 2


Ypred = modelPLS.train(XTknn, Ytrain)  # Train the model


# Evaluate the model 
modelPLS.evaluate(cutoffscore=0.5)


modelPLS.permutation_test(nperm=100) #nperm refers to the number of permutations

Permutation Resample: 100%|██████████| 100/100 [00:01<00:00, 52.68it/s]


modelPLS.plot_projections(label=dataTrain[['Idx','SampleID']], size=12) # size changes circle size


# Calculate the bootstrapped confidence intervals 
modelPLS.calc_bootci(type='bca', bootnum=200)                # decrease bootnum if it this takes too long on your machine

# Plot the feature importance plots, and return a new Peaksheet 
peakSheet = modelPLS.plot_featureimportance(peakTableClean,
                                            peaklist,
                                            ylabel='Label',  # change ylabel to 'Name' 
                                            sort=False)      # change sort to False

Bootstrap Resample: 100%|██████████| 200/200 [00:00<00:00, 777.82it/s]
Jackknife Resample: 100%|██████████| 62/62 [00:00<00:00, 775.95it/s]


# Get mu and sigma from the training dataset to use for the Xtest scaling
mu, sigma  = cb.utils.scale(XTlog, return_mu_sigma=True)


# Pull of Xtest from DataTest using peaklist ('Name' column in PeakTable)
peaklist = peakTableClean.Name 
XV = dataTest[peaklist].values

# Log transform, unit-scale and knn-impute missing values for Xtest
XVlog = np.log(XV)
XVscale  = cb.utils.scale(XVlog, method='auto', mu=mu, sigma=sigma) 
XVknn = cb.utils.knnimpute(XVscale, k=3)


# Calculate Ypredicted score using modelPLS.test
YVpred = modelPLS.test(XVknn)

# Evaluate Ypred against Ytest
evals = [Ytest, YVpred]    # alternative formats: (Ytest, Ypred) or np.array([Ytest, Ypred])
#modelPLS.evaluate(evals, specificity=0.9)
modelPLS.evaluate(evals, cutoffscore=0.5)


# Save DataSheet as 'Idx', 'SampleID', and 'Class' from DataTest
dataSheet = dataTest[["Idx", "SampleID", "Class"]].copy() 

# Add 'Ypred' to Datasheet
dataSheet['Ypred'] = YVpred 
 
display(dataSheet) # View and check the dataTable


# Create an empty excel workbook
writer = pd.ExcelWriter("modelPLS.xlsx")     # provide the filename for the Excel file

# Add each dataframe to the workbook in turn, as a separate worksheet
dataSheet.to_excel(writer, sheet_name='Datasheet', index=False)
peakSheet.to_excel(writer, sheet_name='Peaksheet', index=False)

# Write the Excel workbook to disk
writer.save()

print("Done!")

Done!

	Idx	SampleID	SampleType	Class	M1	M2	M3	M4	M5	M6	...	M140	M141	M142	M143	M144	M145	M146	M147	M148	M149
1	1	sample_1	QC	QC	90.1	491.6	202.9	35.0	164.2	19.7	...	115.1	64.8	25.5	473.9	26.5	NaN	6.8	118.6	710.6	203.6
2	2	sample_2	Sample	GC	43.0	525.7	130.2	NaN	694.5	114.5	...	84.2	357.1	16.1	455.5	29.5	28.1	35.8	316.1	390.7	199.0
3	3	sample_3	Sample	BN	214.3	10703.2	104.7	46.8	483.4	152.3	...	993.5	1698.5	32.9	75.9	33.2	802.8	967.6	154.4	31.6	195.2
4	4	sample_4	Sample	HE	31.6	59.7	86.4	14.0	88.6	10.3	...	58.1	83.5	60.5	136.9	17.0	10.2	24.7	64.1	91.4	91.6
5	5	sample_5	Sample	GC	81.9	258.7	315.1	8.7	243.2	18.4	...	44.5	47.6	45.6	1441.7	35.2	0.1	22.8	135.0	322.3	254.3
6	6	sample_6	Sample	BN	196.9	128.2	862.5	18.7	200.1	4.7	...	143.8	157.2	10.4	182.1	32.6	435.1	325.3	162.4	129.7	207.2
7	7	sample_7	Sample	GC	45.5	190.4	32.0	NaN	362.7	35.7	...	2413.8	260.0	9.6	1004.5	39.2	1748.7	1360.0	240.9	326.5	229.9
8	8	sample_8	Sample	HE	91.0	231.9	212.5	18.2	72.5	6.7	...	71.6	56.5	7.1	384.7	29.7	0.9	1.7	70.4	18.0	81.6
9	9	sample_9	Sample	GC	480.6	470.3	60.7	8.4	270.2	57.4	...	71.6	NaN	54.0	1369.7	29.3	0.2	12.0	34.0	106.2	197.2
10	10	sample_10	QC	QC	62.2	181.5	75.5	36.0	203.4	18.7	...	107.2	111.6	20.8	516.5	23.2	5.4	5.1	197.2	261.9	232.4
11	11	sample_11	Sample	BN	36.5	190.1	153.1	47.4	146.5	26.9	...	66.3	74.0	6.1	180.7	23.6	2.8	18.3	132.0	123.2	178.3
12	12	sample_12	Sample	HE	93.5	525.3	215.8	45.0	62.6	12.8	...	87.6	97.4	22.8	236.9	25.9	80.0	43.0	NaN	159.4	185.6
13	13	sample_13	Sample	GC	NaN	205.9	19.6	40.9	106.9	25.0	...	26.0	167.7	NaN	128.6	25.4	148.5	64.8	79.1	48.8	190.4
14	14	sample_14	Sample	BN	52.1	94.7	68.2	26.0	95.5	9.0	...	43.6	136.2	282.9	2926.1	1251.4	2.7	18.5	98.9	220.1	113.4
15	15	sample_15	Sample	HE	NaN	250.3	59.1	70.6	65.4	20.5	...	103.2	172.3	3.7	1.6	26.5	NaN	7.6	406.3	404.7	129.5
16	16	sample_16	Sample	GC	NaN	29.8	41.1	NaN	193.1	15.6	...	22.4	164.8	3.1	81.4	22.6	4.5	3.8	80.7	280.6	81.8
17	17	sample_17	Sample	BN	62.5	123.5	99.7	51.1	45.0	40.4	...	50.2	208.6	8.3	259.5	23.7	5.8	10.4	246.6	206.0	199.9
18	18	sample_18	Sample	HE	0.9	225.3	173.3	13.4	51.2	7.1	...	90.3	104.7	4.6	102.5	25.6	NaN	NaN	60.8	1.0	47.9
19	19	sample_19	QC	QC	154.3	306.9	191.9	40.4	180.1	18.6	...	35.1	151.3	24.5	171.9	27.5	NaN	9.6	162.7	684.0	171.3
20	20	sample_20	Sample	GC	216.3	422.2	96.7	23.7	13.4	39.6	...	5.8	126.0	10.0	326.5	26.4	8.9	16.1	219.7	103.7	134.4
21	21	sample_21	Sample	BN	420.3	183.1	106.0	28.1	353.8	57.9	...	242.6	193.9	44.0	156.3	36.4	145.9	164.7	231.5	220.2	356.0
22	22	sample_22	Sample	HE	NaN	532.7	108.8	51.4	228.6	75.7	...	30.2	119.1	16.6	415.4	29.4	NaN	3.8	840.2	159.4	211.9
23	23	sample_23	Sample	GC	0.5	26.8	7.8	8.0	38.3	0.6	...	0.5	25.5	2.6	NaN	24.7	NaN	6.3	112.5	17.1	25.7
24	24	sample_24	Sample	BN	410.8	4235.4	145.3	31.0	2014.7	144.1	...	79.1	1722.6	77.3	779.9	58.8	53.1	148.0	607.0	358.5	263.6
25	25	sample_25	Sample	HE	133.2	689.7	451.1	91.8	551.7	112.1	...	399.7	358.2	75.3	1124.2	29.8	0.3	17.3	184.2	277.5	119.4
26	26	sample_26	Sample	GC	355.0	854.9	172.2	6.0	305.5	27.7	...	22.1	47.8	4.3	558.4	30.9	205.3	156.5	116.8	193.6	133.4
27	27	sample_27	Sample	BN	80.2	178.1	126.3	27.8	253.3	16.0	...	286.9	117.7	12.8	133.2	25.8	173.0	95.3	209.6	235.7	157.3
28	28	sample_28	QC	QC	NaN	494.4	74.9	40.9	152.6	21.4	...	NaN	60.1	27.4	602.9	25.5	7.0	NaN	197.4	718.8	190.6
29	29	sample_29	Sample	HE	171.2	301.7	276.6	58.5	187.8	29.0	...	96.0	89.1	8.0	814.4	25.3	7.4	5.6	270.1	2560.3	276.4
30	30	sample_30	Sample	GC	135.8	279.2	126.3	38.8	221.0	24.3	...	106.4	308.5	47.3	101.1	31.2	18.8	2.7	184.9	353.0	244.8
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
111	111	sample_111	Sample	BN	NaN	570.8	397.8	203.6	2143.1	38.0	...	182.9	179.1	118.1	7036.7	30.0	18.0	34.2	718.0	387.4	502.5
112	112	sample_112	Sample	HE	NaN	119.5	29.7	39.9	54.0	61.0	...	96.0	159.9	4.2	64.2	58.0	94.1	81.1	186.2	262.6	255.3
113	113	sample_113	Sample	GC	0.9	97.9	58.9	17.3	52.2	NaN	...	56.0	59.2	10.1	243.3	26.8	0.5	13.6	81.5	106.0	88.5
114	114	sample_114	Sample	BN	51.4	147.0	162.8	51.3	165.6	26.9	...	70.5	40.6	15.0	56.4	29.1	8.5	9.7	151.6	156.4	205.5
115	115	sample_115	Sample	HE	41.3	408.4	91.7	15.5	72.8	37.3	...	84.4	240.8	3.3	187.5	27.7	5.9	NaN	299.8	84.4	219.5
116	116	sample_116	Sample	GC	86.7	362.6	278.1	35.9	256.6	19.0	...	178.9	88.5	30.6	4319.7	28.6	180.4	43.7	118.7	719.5	204.8
117	117	sample_117	Sample	BN	316.4	582.6	158.0	32.7	304.7	41.9	...	27.4	187.2	11.0	469.9	29.3	3.3	0.2	302.2	80.2	161.4
118	118	sample_118	QC	QC	152.3	260.0	198.8	32.6	156.4	6.5	...	134.9	48.5	23.3	332.2	27.2	5.3	7.4	127.7	666.5	188.2
119	119	sample_119	Sample	HE	215.8	357.8	110.5	49.0	77.5	129.9	...	37.5	78.9	7.8	198.0	26.2	NaN	4.7	253.3	41.6	135.1
120	120	sample_120	Sample	GC	205.1	394.3	274.5	22.4	150.0	32.2	...	542.9	449.1	16.4	865.4	28.8	458.4	413.7	244.5	235.7	205.6
121	121	sample_121	Sample	BN	48.1	263.9	170.3	0.1	117.8	34.0	...	43.6	140.3	10.1	116.1	27.4	4.9	14.7	90.7	131.6	140.5
122	122	sample_122	Sample	HE	7.1	394.2	NaN	19.4	79.2	18.7	...	NaN	210.6	6.5	316.5	41.9	1.4	14.6	161.0	79.5	52.5
123	123	sample_123	Sample	GC	27.2	133.4	43.4	1.8	160.3	9.7	...	50.0	156.5	3.7	58.5	25.2	1.6	6.3	69.4	128.1	70.6
124	124	sample_124	Sample	BN	113.4	82.4	163.5	39.7	131.9	22.9	...	40.8	59.5	11.0	304.0	142.4	21.7	31.8	77.8	167.2	285.9
125	125	sample_125	Sample	HE	50.8	772.5	116.3	86.4	153.5	256.8	...	50.0	14.8	8.1	271.1	24.9	NaN	13.7	470.5	78.0	330.9
126	126	sample_126	Sample	GC	8.6	4.8	12.8	32.8	94.4	18.9	...	25.3	77.8	9.9	28.9	30.5	3.5	7.4	81.4	64.9	62.0
127	127	sample_127	QC	QC	NaN	180.1	120.1	35.3	168.4	NaN	...	158.8	74.0	23.0	488.6	25.4	2.8	0.4	32.5	660.6	194.5
128	128	sample_128	Sample	BN	0.4	218.6	12.4	73.9	67.0	21.0	...	23.5	94.3	1.2	272.9	25.8	6.2	10.3	292.8	194.9	114.8
129	129	sample_129	Sample	HE	NaN	682.7	57.9	7.3	822.9	106.5	...	4934.1	4679.7	9.8	309.5	28.4	4691.3	4791.5	184.2	275.3	365.6
130	130	sample_130	Sample	GC	163.1	341.5	147.2	24.9	53.8	30.2	...	41.7	212.7	4.3	54.8	28.0	6.0	5.2	12.0	199.5	223.8
131	131	sample_131	Sample	BN	25.3	235.9	50.2	38.3	167.7	72.8	...	14.3	79.7	4.6	134.9	28.4	9.1	15.4	162.3	12.2	163.7
132	132	sample_132	Sample	HE	89.9	112.8	195.2	6.4	56.7	0.2	...	76.0	73.9	0.1	98.8	26.9	407.1	298.0	71.3	74.0	53.9
133	133	sample_133	Sample	GC	12.3	81.0	45.0	53.8	44.6	18.7	...	99.9	33.3	6.3	84.8	35.0	0.1	12.6	43.9	57.6	156.0
134	134	sample_134	Sample	BN	133.9	172.6	121.5	128.4	324.6	79.7	...	69.3	268.4	7.1	62.7	27.4	2.8	32.5	249.6	1868.7	325.8
135	135	sample_135	Sample	HE	7.5	390.5	67.9	38.4	58.9	26.5	...	99.7	184.4	1.7	94.7	32.3	NaN	5.8	174.2	NaN	145.5
136	136	sample_136	QC	QC	97.6	341.1	232.1	38.1	174.0	7.7	...	79.7	101.8	23.9	296.0	25.0	NaN	7.5	141.5	675.7	200.8
137	137	sample_137	Sample	GC	405.3	510.7	521.9	91.9	732.1	145.7	...	434.8	84.8	182.3	110.7	123.9	0.4	36.3	60.1	317.3	401.7
138	138	sample_138	Sample	BN	45.4	191.6	41.0	18.7	40.8	32.2	...	45.3	44.5	14.5	83.8	27.9	0.3	0.5	47.3	47.8	46.5
139	139	sample_139	Sample	HE	30.7	56.8	35.9	20.9	17.4	NaN	...	28.0	38.7	1.3	130.9	24.6	0.7	5.9	20.7	124.1	28.9
140	140	sample_140	QC	QC	99.8	342.7	247.4	28.0	166.6	11.1	...	148.2	97.7	22.9	439.7	24.7	NaN	6.4	129.4	670.1	202.5

	Idx	Name	Label	Perc_missing	QC_RSD
1	1	M1	1_3-Dimethylurate	11.428571	32.208005
2	2	M2	1_6-Anhydro-β-D-glucose	0.714286	31.178028
3	3	M3	1_7-Dimethylxanthine	5.000000	34.990605
4	4	M4	1-Methylnicotinamide	8.571429	12.804201
5	5	M5	2-Aminoadipate	1.428571	9.372664
6	6	M6	2-Aminobutyrate	5.000000	46.977149
7	7	M7	2-Furoylglycine	2.857143	5.049156
8	8	M8	2-Hydroxyisobutyrate	0.000000	5.132340
9	9	M9	2-Hydroxyphenylacetate	19.285714	28.691165
10	10	M10	2-Oxoglutarate	7.857143	33.081213
11	11	M11	3-Aminoisobutyrate	5.000000	15.476165
12	12	M12	3-Hydroxy-3-methylglutarate	0.000000	26.604957
13	13	M13	3-Hydroxybutyrate	0.000000	39.465872
14	14	M14	3-Hydroxyisobutyrate	2.142857	8.905711
15	15	M15	3-Hydroxyisovalerate	0.000000	4.200837
16	16	M16	3-Hydroxymandelate	7.142857	22.961669
17	17	M17	3-Hydroxyphenylacetate	27.142857	133.785710
18	18	M18	3-Indoxylsulfate	1.428571	20.859142
19	19	M19	4-Aminohippurate	8.571429	42.305438
20	20	M20	4-Hydroxy-3-methoxymandelate	5.000000	37.790357
21	21	M21	4-Hydroxyphenylacetate	31.428571	65.443098
22	22	M22	4-Hydroxyphenyllactate	20.000000	75.436027
23	23	M23	5-Aminolevulinate	0.714286	31.630897
24	24	M24	5-Hydroxytryptophan	10.714286	23.694387
25	25	M25	6-Hydroxynicotinate	0.000000	18.149034
26	26	M26	ATP	5.714286	16.420150
27	27	M27	Acetaminophen	12.142857	69.127567
28	28	M28	Acetate	15.000000	32.893514
29	29	M29	Acetoacetate	2.142857	28.182375
30	30	M30	Acetone	1.428571	21.799647
...	...	...	...	...	...
120	120	M120	Tyrosine	3.571429	12.001142
121	121	M121	Urocanate	19.285714	40.439011
122	122	M122	Valine	2.142857	19.146486
123	123	M123	Xanthine	5.000000	32.031527
124	124	M124	Xylose	3.571429	32.823992
125	125	M125	cis-Aconitate	0.000000	65.063379
126	126	M126	trans-Aconitate	2.857143	3.813505
127	127	M127	u072	21.428571	35.369942
128	128	M128	u075	8.571429	44.069696
129	129	M129	u11	0.000000	4.348629
130	130	M130	u1125	7.142857	5.359377
131	131	M131	u122	0.000000	30.895475
132	132	M132	u122triplet	0.000000	29.184750
133	133	M133	u14	0.000000	21.052906
134	134	M134	u144	1.428571	15.536401
135	135	M135	u14doublet	0.000000	26.423975
136	136	M136	u185	22.857143	6.836917
137	137	M137	u217	2.857143	5.005221
138	138	M138	u233	0.714286	4.735334
139	139	M139	u361	10.000000	47.939803
140	140	M140	u362	3.571429	30.033727
141	141	M141	u380large	1.428571	41.571960
142	142	M142	u43	0.714286	7.151166
143	143	M143	u433	1.428571	24.826144
144	144	M144	u87	0.000000	6.635486
145	145	M145	uarm1	23.571429	41.406985
146	146	M146	uarm2	4.285714	34.458172
147	147	M147	β-Alanine	1.428571	27.623517
148	148	M148	π-Methylhistidine	1.428571	16.561921
149	149	M149	τ-Methylhistidine	0.000000	8.351801

	Idx	Name	Label	Grp0_Mean	Grp0_Mean-95CI	Grp1_Mean	Grp1_Mean-95CI	Sign	TTestStat	TTestPvalue	bhQvalue	TotalMissing	PercTotalMissing	Grp0_Missing	Grp1_Missing	ShapiroW	ShapiroPvalue	LeveneW	LevenePvalue
1	4	M4	1-Methylnicotinamide	51.739474	(39.35, 64.13)	26.477778	(19.98, 32.98)	0	3.482846	0.000848	0.008816	9	10.843	5.0	16.279	0.861608	9.126937e-07	9.835944	0.002478
2	5	M5	2-Aminoadipate	169.915000	(115.14, 224.69)	265.118605	(146.65, 383.59)	1	-1.395129	0.166791	0.420837	0	0.000	0.0	0.000	0.551547	1.591575e-14	1.714528	0.194101
3	7	M7	2-Furoylglycine	53.987179	(31.17, 76.81)	118.525581	(78.5, 158.55)	1	-2.672784	0.009114	0.059243	1	1.205	2.5	0.000	0.696827	1.009690e-11	5.909098	0.017299
4	8	M8	2-Hydroxyisobutyrate	79.267500	(59.69, 98.85)	54.395349	(42.53, 66.26)	0	2.163519	0.033448	0.158119	0	0.000	0.0	0.000	0.817766	9.915664e-09	3.216053	0.076652
5	11	M11	3-Aminoisobutyrate	171.279487	(104.01, 238.55)	201.343902	(107.59, 295.1)	0	-0.506244	0.614113	0.760330	3	3.614	2.5	4.651	0.629707	6.749377e-13	0.241249	0.624685
6	14	M14	3-Hydroxyisobutyrate	83.902500	(58.8, 109.01)	61.531707	(45.75, 77.31)	0	1.486528	0.141120	0.407680	2	2.410	0.0	4.651	0.730224	6.725048e-11	1.852628	0.177348
7	15	M15	3-Hydroxyisovalerate	62.300000	(48.06, 76.54)	58.472093	(44.97, 71.98)	0	0.382551	0.703054	0.812418	0	0.000	0.0	0.000	0.820793	1.225706e-08	0.110564	0.740362
8	25	M25	6-Hydroxynicotinate	20.640000	(15.86, 25.42)	30.293023	(21.28, 39.31)	1	-1.814600	0.073288	0.238185	0	0.000	0.0	0.000	0.735144	6.198545e-11	1.828066	0.180119
9	26	M26	ATP	22.813514	(17.62, 28.0)	39.944737	(20.29, 59.6)	1	-1.632723	0.106834	0.326785	8	9.639	7.5	11.628	0.455527	3.358144e-15	2.474440	0.120035
10	31	M31	Adipate	57.615000	(31.6, 83.63)	80.041860	(18.98, 141.1)	0	-0.645264	0.520580	0.694410	0	0.000	0.0	0.000	0.359728	2.820873e-17	0.466944	0.496346
11	32	M32	Alanine	124.887500	(100.74, 149.04)	237.700000	(179.11, 296.29)	1	-3.397787	0.001056	0.009150	0	0.000	0.0	0.000	0.803849	3.846519e-09	10.550634	0.001691
12	33	M33	Anserine	352.142500	(276.11, 428.17)	393.334884	(313.11, 473.56)	0	-0.728053	0.468681	0.676983	0	0.000	0.0	0.000	0.886427	2.378072e-06	0.083149	0.773811
13	36	M36	Asparagine	42.518750	(26.09, 58.95)	54.965854	(35.56, 74.37)	1	-0.926155	0.357503	0.546769	10	12.048	20.0	4.651	0.723858	2.053862e-10	0.477575	0.491776
14	37	M37	Azelate	74.415000	(55.56, 93.27)	97.976744	(63.44, 132.52)	0	-1.149511	0.253728	0.509733	0	0.000	0.0	0.000	0.758025	2.244279e-10	2.658521	0.106879
15	45	M45	Citrate	4629.902500	(3458.7, 5801.1)	2386.339535	(1699.7, 3072.98)	0	3.294034	0.001466	0.010890	0	0.000	0.0	0.000	0.807643	4.957869e-09	9.381234	0.002975
16	48	M48	Creatinine	11747.385000	(9101.76, 14393.01)	8838.939535	(7199.03, 10478.85)	0	1.859463	0.066592	0.230853	0	0.000	0.0	0.000	0.886391	2.370112e-06	4.661800	0.033799
17	50	M50	Ethanol	204.630000	(154.57, 254.69)	330.404651	(146.01, 514.8)	0	-1.249709	0.215005	0.486098	0	0.000	0.0	0.000	0.385147	5.978543e-17	1.802594	0.183150
18	51	M51	Ethanolamine	442.841176	(319.66, 566.02)	372.950000	(290.71, 455.19)	1	0.953964	0.343208	0.546769	7	8.434	15.0	2.326	0.876171	2.284189e-06	1.587239	0.211680
19	65	M65	Glycylproline	467.182500	(357.4, 576.96)	583.195349	(418.42, 747.97)	0	-1.131128	0.261339	0.509733	0	0.000	0.0	0.000	0.807818	5.016720e-09	1.822093	0.180825
20	66	M66	Hippurate	1271.770000	(763.17, 1780.37)	2114.041860	(1119.43, 3108.65)	1	-1.445247	0.152246	0.416674	0	0.000	0.0	0.000	0.556458	1.917547e-14	2.500776	0.117687
21	68	M68	Histidine	223.959459	(156.34, 291.58)	182.490476	(141.55, 223.43)	1	1.055760	0.294379	0.528921	4	4.819	7.5	2.326	0.891451	6.079539e-06	3.382482	0.069748
22	71	M71	Ibuprofen	57.492500	(33.08, 81.9)	48.197674	(33.13, 63.26)	0	0.644911	0.520808	0.694410	0	0.000	0.0	0.000	0.607780	1.477035e-13	0.248795	0.619277
23	73	M73	Indole-3-lactate	76.497500	(59.03, 93.96)	103.772093	(82.21, 125.33)	1	-1.909565	0.059730	0.225653	0	0.000	0.0	0.000	0.907167	1.793880e-05	1.846147	0.178004
24	74	M74	Isoleucine	30.544737	(22.45, 38.64)	32.963415	(21.19, 44.73)	0	-0.326890	0.744638	0.823855	4	4.819	5.0	4.651	0.753753	3.452411e-10	0.210325	0.647805
25	75	M75	Lactate	142.290000	(105.73, 178.85)	173.300000	(128.33, 218.27)	1	-1.039569	0.301633	0.528921	0	0.000	0.0	0.000	0.784160	1.081508e-09	1.050113	0.308532
26	88	M88	N-Acetylglutamine	181.490000	(143.31, 219.67)	186.318605	(143.54, 229.1)	0	-0.164148	0.870024	0.887083	0	0.000	0.0	0.000	0.892844	4.342188e-06	0.072094	0.788995
27	89	M89	N-AcetylglutamineDerivative	378.405000	(289.7, 467.11)	966.325581	(717.42, 1215.23)	1	-4.236836	0.000060	0.001549	0	0.000	0.0	0.000	0.758458	2.301412e-10	10.067315	0.002133
28	90	M90	N-Acetylornithine	107.565000	(73.2, 141.93)	102.439535	(69.89, 134.99)	1	0.212388	0.832338	0.865631	0	0.000	0.0	0.000	0.771708	5.037613e-10	0.095591	0.757980
29	91	M91	N-Acetylserotonin	63.058974	(47.54, 78.58)	89.076744	(67.78, 110.37)	1	-1.902133	0.060753	0.225653	1	1.205	2.5	0.000	0.824217	1.807414e-08	0.882185	0.350431
30	93	M93	N-Methylhydantoin	73.770000	(58.34, 89.2)	76.669767	(60.34, 93.0)	0	-0.252163	0.801554	0.865631	0	0.000	0.0	0.000	0.882224	1.619430e-06	0.091236	0.763385
31	101	M101	Pantothenate	50.497368	(34.95, 66.04)	40.944186	(27.81, 54.08)	0	0.926283	0.357120	0.546769	2	2.410	5.0	0.000	0.746400	1.624373e-10	0.342463	0.560079
32	104	M104	Proline	277.610256	(211.89, 343.33)	359.125581	(266.86, 451.4)	1	-1.384833	0.169953	0.420837	1	1.205	2.5	0.000	0.856649	2.083191e-07	1.935810	0.167981
33	105	M105	Propylene glycol	252.143243	(103.53, 400.75)	161.411905	(91.05, 231.77)	0	1.123611	0.264669	0.509733	4	4.819	7.5	2.326	0.522186	1.271518e-14	1.272416	0.262816
34	106	M106	Pyridoxine	71.635000	(52.81, 90.46)	71.811628	(51.92, 91.71)	0	-0.012598	0.989980	0.989980	0	0.000	0.0	0.000	0.786728	1.270637e-09	0.021844	0.882871
35	107	M107	Pyroglutamate	356.247500	(267.5, 445.0)	381.706977	(288.12, 475.29)	1	-0.385645	0.700771	0.812418	0	0.000	0.0	0.000	0.892383	4.155605e-06	0.043377	0.835540
36	110	M110	Serotonin	63.927273	(38.67, 89.19)	81.236585	(63.35, 99.12)	1	-1.124323	0.264611	0.509733	9	10.843	17.5	4.651	0.815208	3.283894e-08	0.183965	0.669268
37	115	M115	Trigonelline	170.687179	(79.84, 261.53)	246.972093	(132.68, 361.26)	1	-1.010467	0.315318	0.528921	1	1.205	2.5	0.000	0.596260	1.123890e-13	0.803617	0.372704
38	116	M116	Trimethylamine	28.195000	(20.78, 35.61)	37.116667	(21.92, 52.31)	0	-1.018330	0.311591	0.528921	1	1.205	0.0	2.326	0.629688	4.547954e-13	1.953823	0.166039
39	118	M118	Tropate	119.992500	(91.12, 148.86)	337.895238	(233.03, 442.76)	1	-3.843517	0.000242	0.003146	1	1.205	0.0	2.326	0.713623	2.359147e-11	14.059792	0.000333
40	119	M119	Tryptophan	88.732500	(60.96, 116.51)	96.062791	(76.22, 115.91)	1	-0.425400	0.671673	0.812256	0	0.000	0.0	0.000	0.797285	2.497893e-09	0.009748	0.921594
41	120	M120	Tyrosine	82.827778	(32.06, 133.6)	104.245238	(79.56, 128.93)	1	-0.777287	0.439402	0.652826	5	6.024	10.0	2.326	0.591539	2.110843e-13	0.000126	0.991069
42	122	M122	Valine	26.242500	(20.59, 31.9)	27.360465	(19.39, 35.33)	0	-0.221194	0.825498	0.865631	0	0.000	0.0	0.000	0.772160	5.176929e-10	0.155136	0.694710
43	126	M126	trans-Aconitate	36.242500	(28.24, 44.25)	76.670732	(43.9, 109.44)	1	-2.323154	0.022744	0.118271	2	2.410	0.0	4.651	0.516279	6.681531e-15	3.621831	0.060669
44	129	M129	u11	1720.520000	(1410.22, 2030.82)	1921.216279	(1410.3, 2432.13)	0	-0.646389	0.519855	0.694410	0	0.000	0.0	0.000	0.824849	1.634182e-08	1.487213	0.226189
45	130	M130	u1125	66.865714	(22.72, 111.01)	88.579487	(29.31, 147.85)	1	-0.565405	0.573556	0.745623	9	10.843	12.5	9.302	0.419807	1.411129e-15	0.046402	0.830056
46	134	M134	u144	687.166667	(512.67, 861.67)	1993.974419	(1384.78, 2603.16)	1	-3.873564	0.000218	0.003146	1	1.205	2.5	0.000	0.677765	4.003797e-12	11.731688	0.000972
47	137	M137	u217	1133.863158	(396.93, 1870.8)	984.004762	(457.21, 1510.8)	1	0.328846	0.743153	0.823855	3	3.614	5.0	2.326	0.455860	1.074063e-15	0.236779	0.627907
48	138	M138	u233	332.930769	(167.71, 498.15)	1521.000000	(1118.39, 1923.61)	1	-5.159999	0.000002	0.000091	1	1.205	2.5	0.000	0.770728	5.589887e-10	18.192045	0.000054
49	142	M142	u43	11.632500	(6.46, 16.81)	27.695238	(16.57, 38.82)	1	-2.524698	0.013557	0.078329	1	1.205	0.0	2.326	0.615945	2.533461e-13	4.844210	0.030622
50	144	M144	u87	28.280000	(26.31, 30.25)	39.946512	(28.79, 51.11)	1	-1.949558	0.054689	0.225653	0	0.000	0.0	0.000	0.376480	4.616048e-17	3.812503	0.054326
51	148	M148	π-Methylhistidine	235.997368	(100.34, 371.66)	344.723256	(247.94, 441.51)	1	-1.300479	0.197218	0.466151	2	2.410	5.0	0.000	0.678142	4.912866e-12	0.819846	0.367978
52	149	M149	τ-Methylhistidine	173.617500	(138.01, 209.23)	162.123256	(135.12, 189.13)	1	0.508472	0.612504	0.760330	0	0.000	0.0	0.000	0.946665	1.774491e-03	2.427142	0.123149

	Idx	SampleID	Class	Ypred
4	4	sample_4	HE	0.596320
78	78	sample_78	GC	0.762340
90	90	sample_90	HE	0.190719
71	71	sample_71	GC	0.799896
92	92	sample_92	GC	1.046995
119	119	sample_119	HE	0.122116
56	56	sample_56	HE	0.166406
104	104	sample_104	HE	-0.130498
98	98	sample_98	GC	0.230983
36	36	sample_36	GC	0.401337
35	35	sample_35	HE	0.050801
95	95	sample_95	GC	1.135492
94	94	sample_94	HE	-0.136688
129	129	sample_129	HE	0.727619
58	58	sample_58	GC	1.069837
75	75	sample_75	GC	1.165530
63	63	sample_63	HE	0.180942
88	88	sample_88	GC	1.014826
101	101	sample_101	HE	0.174986
43	43	sample_43	GC	0.392416
33	33	sample_33	GC	0.979422

Tutorial 1: Basic Metabolomics Data Analysis Workflow

1. Import Packages/Modules

2. Load Data and Peak sheet

2.1 Display the `Data` table

2.2 Display the `Peak` table

3. Data Cleaning

4. PCA - Quality Assessment

5. Univariate Statistics for comparison of Gastric Cancer (`GC`) vs Healthy Controls (`HE`)

6. Machine Learning

6.1 Splitting data into Training and Test sets.

6.2. Determine optimal number of components for PLS-DA model

6.3 Train and evaluate PLS-DA model

6.4. Perform a permutation test for the PLS-DA model

6.5. Plot latent variable projections for PLS-DA model

6.6. Plot feature importance (Coefficient plot and VIP) for PLS-DA model

6.7. Test model with new data (using test set from section 6.1)

6.8. Export results to Excel

Tutorial 1: Basic Metabolomics Data Analysis Workflow

1. Import Packages/Modules

2. Load Data and Peak sheet

2.1 Display the Data table

2.2 Display the Peak table

3. Data Cleaning

4. PCA - Quality Assessment

5. Univariate Statistics for comparison of Gastric Cancer (GC) vs Healthy Controls (HE)

6. Machine Learning

6.1 Splitting data into Training and Test sets.

6.2. Determine optimal number of components for PLS-DA model

6.3 Train and evaluate PLS-DA model

6.4. Perform a permutation test for the PLS-DA model

6.5. Plot latent variable projections for PLS-DA model

6.6. Plot feature importance (Coefficient plot and VIP) for PLS-DA model

6.7. Test model with new data (using test set from section 6.1)

6.8. Export results to Excel

2.1 Display the `Data` table

2.2 Display the `Peak` table

5. Univariate Statistics for comparison of Gastric Cancer (`GC`) vs Healthy Controls (`HE`)