Tutorials_paired_modality
[1]:
import scanpy as sc
import torch
import os
import pandas as pd
os.chdir("/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO")
[2]:
import sys
sys.path.append("/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO")
[3]:
import community as louvain
import AgaeSMO as AgaeSMO_v1
[4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
[5]:
os.environ['R_HOME'] = '/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/R'
[6]:
adata_omics1 = sc.read_h5ad("data/Human_Lymph_Node_A1/adata_RNA.h5ad")
adata_omics2 = sc.read_h5ad("data/Human_Lymph_Node_A1/adata_ADT.h5ad")
adata_omics1.var_names_make_unique()
adata_omics2.var_names_make_unique()
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
[7]:
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)
sc.pp.scale(adata_omics1)
[8]:
adata_omics1_high = adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = AgaeSMO_v1.pca(adata_omics1_high, n_comps=adata_omics2.n_vars-1)
adata_omics1.obsm['tensor']=adata_omics1[:, adata_omics1.var['highly_variable']].X
# adata_omics1.obsm['tensor']=AgaeSMO_v1.pca(adata_omics1_high, n_comps=100)
adata_omics2 = AgaeSMO_v1.clr_normalize_each_cell(adata_omics2)
sc.pp.scale(adata_omics2)
adata_omics2.obsm['feat'] = AgaeSMO_v1.pca(adata_omics2, n_comps=adata_omics2.n_vars-1)
# adata_omics2.obsm['tensor']=AgaeSMO_v1.pca(adata_omics2, n_comps=adata_omics2.n_vars-1)
adata_omics2.obsm['tensor']=adata_omics2.X
[9]:
data = AgaeSMO_v1.construct_neighbor_graph(adata_omics1, adata_omics2,6,6,k=6)
------Calculating spatial graph...
The graph contains 20902 edges, 3484 cells.
5.9994 neighbors per cell on average.
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
------Calculating spatial graph...
The graph contains 20902 edges, 3484 cells.
5.9994 neighbors per cell on average.
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
[10]:
model = AgaeSMO_v1.Train_AgaeSMO(data, device=device,learning_rate=0.001,epochs=600,loss_weight=[1,1,1,1])
output = model.train()
torch.Size([3484, 3000]) torch.Size([3484, 31])
dim_input1 3000 dim_input2 31
dim_output1 30 dim_output2 30
0%| | 0/600 [00:00<?, ?it/s]/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/torch/nn/functional.py:1956: UserWarning: nn.functional.tanh is deprecated. Use torch.tanh instead.
warnings.warn("nn.functional.tanh is deprecated. Use torch.tanh instead.")
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/layer.py:250: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
self.alpha = F.softmax(torch.squeeze(self.vu) + 1e-6)
0%| | 1/600 [00:03<34:10, 3.42s/it]
epoch:0 3.523435115814209
2%|▏ | 11/600 [00:04<01:31, 6.40it/s]
epoch:10 1.109488844871521
4%|▎ | 21/600 [00:05<01:07, 8.53it/s]
epoch:20 0.9199943542480469
5%|▌ | 31/600 [00:06<01:05, 8.62it/s]
epoch:30 0.8487171530723572
7%|▋ | 41/600 [00:07<01:04, 8.60it/s]
epoch:40 0.8076435327529907
8%|▊ | 51/600 [00:08<01:03, 8.65it/s]
epoch:50 0.7793477177619934
10%|█ | 61/600 [00:09<01:02, 8.63it/s]
epoch:60 0.7591758370399475
12%|█▏ | 71/600 [00:11<01:01, 8.63it/s]
epoch:70 0.7457008361816406
14%|█▎ | 81/600 [00:12<00:59, 8.65it/s]
epoch:80 0.7350626587867737
15%|█▌ | 91/600 [00:13<00:58, 8.65it/s]
epoch:90 0.7239288687705994
17%|█▋ | 101/600 [00:14<00:57, 8.65it/s]
epoch:100 0.7157127261161804
18%|█▊ | 111/600 [00:15<00:56, 8.65it/s]
epoch:110 0.7050766944885254
20%|██ | 121/600 [00:16<00:55, 8.65it/s]
epoch:120 0.7002685070037842
22%|██▏ | 131/600 [00:17<00:54, 8.65it/s]
epoch:130 0.690095841884613
24%|██▎ | 141/600 [00:18<00:53, 8.66it/s]
epoch:140 0.6826991438865662
25%|██▌ | 151/600 [00:19<00:52, 8.63it/s]
epoch:150 0.6762441396713257
27%|██▋ | 161/600 [00:20<00:50, 8.64it/s]
epoch:160 0.6688088178634644
28%|██▊ | 171/600 [00:21<00:49, 8.64it/s]
epoch:170 0.6634473204612732
30%|███ | 181/600 [00:23<00:48, 8.64it/s]
epoch:180 0.6571555137634277
32%|███▏ | 191/600 [00:24<00:47, 8.64it/s]
epoch:190 0.6515916585922241
34%|███▎ | 201/600 [00:25<00:46, 8.64it/s]
epoch:200 0.6466171741485596
35%|███▌ | 211/600 [00:26<00:45, 8.64it/s]
epoch:210 0.6423236131668091
37%|███▋ | 221/600 [00:27<00:43, 8.64it/s]
epoch:220 0.6366432905197144
38%|███▊ | 231/600 [00:28<00:42, 8.63it/s]
epoch:230 0.6339633464813232
40%|████ | 241/600 [00:29<00:41, 8.64it/s]
epoch:240 0.6290221214294434
42%|████▏ | 251/600 [00:30<00:40, 8.63it/s]
epoch:250 0.6252649426460266
44%|████▎ | 261/600 [00:31<00:39, 8.63it/s]
epoch:260 0.6232700347900391
45%|████▌ | 271/600 [00:32<00:38, 8.63it/s]
epoch:270 0.6188769340515137
47%|████▋ | 281/600 [00:33<00:36, 8.63it/s]
epoch:280 0.6156838536262512
48%|████▊ | 291/600 [00:35<00:35, 8.62it/s]
epoch:290 0.6128601431846619
50%|█████ | 301/600 [00:36<00:34, 8.64it/s]
epoch:300 0.6108685731887817
52%|█████▏ | 311/600 [00:37<00:33, 8.62it/s]
epoch:310 0.6093709468841553
54%|█████▎ | 321/600 [00:38<00:32, 8.63it/s]
epoch:320 0.6053789258003235
55%|█████▌ | 331/600 [00:39<00:31, 8.64it/s]
epoch:330 0.6036992073059082
57%|█████▋ | 341/600 [00:40<00:30, 8.62it/s]
epoch:340 0.5986937880516052
58%|█████▊ | 351/600 [00:41<00:28, 8.62it/s]
epoch:350 0.5950220823287964
60%|██████ | 361/600 [00:42<00:27, 8.64it/s]
epoch:360 0.5954763293266296
62%|██████▏ | 371/600 [00:43<00:26, 8.61it/s]
epoch:370 0.5899379253387451
64%|██████▎ | 381/600 [00:44<00:25, 8.61it/s]
epoch:380 0.5875986218452454
65%|██████▌ | 391/600 [00:45<00:24, 8.62it/s]
epoch:390 0.5869743227958679
67%|██████▋ | 401/600 [00:47<00:23, 8.63it/s]
epoch:400 0.58343505859375
68%|██████▊ | 411/600 [00:48<00:21, 8.63it/s]
epoch:410 0.5789999961853027
70%|███████ | 421/600 [00:49<00:20, 8.62it/s]
epoch:420 0.576606035232544
72%|███████▏ | 431/600 [00:50<00:19, 8.64it/s]
epoch:430 0.5726597309112549
73%|███████▎ | 440/600 [00:51<00:17, 9.18it/s]
epoch:440 0.5699533820152283
epoch:450 0.5658155083656311
epoch:460 0.5609947443008423
epoch:470 0.5592743158340454
epoch:480 0.5550069212913513
epoch:490 0.5514665842056274
83%|████████▎ | 496/600 [00:51<00:00, 135.45it/s]
epoch:500 0.5470584630966187
84%|████████▍ | 507/600 [00:52<00:02, 34.47it/s]
epoch:510 0.5442636013031006
87%|████████▋ | 521/600 [00:54<00:04, 18.25it/s]
epoch:520 0.5398437976837158
88%|████████▊ | 530/600 [00:55<00:04, 14.26it/s]
epoch:530 0.5369762778282166
90%|█████████ | 542/600 [00:56<00:05, 10.72it/s]
epoch:540 0.5338019132614136
92%|█████████▏| 551/600 [00:57<00:05, 9.21it/s]
epoch:550 0.5289199948310852
94%|█████████▎| 561/600 [00:58<00:04, 8.66it/s]
epoch:560 0.5248134732246399
95%|█████████▌| 571/600 [00:59<00:03, 8.63it/s]
epoch:570 0.5206929445266724
97%|█████████▋| 581/600 [01:00<00:02, 8.61it/s]
epoch:580 0.5169838666915894
98%|█████████▊| 591/600 [01:01<00:01, 8.62it/s]
epoch:590 0.5164998769760132
100%|██████████| 600/600 [01:02<00:00, 9.54it/s]
Model training finished!
[11]:
adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1'].copy()
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2'].copy()
adata.obsm['AgaeSMO'] = output['AgaeSMO'].copy()
adata.obsm['alpha'] = output['alpha']
[21]:
adata=sc.read_h5ad("../AgaeSMO/result/Dataset11_Human_Lymph_Node_A1.h5ad")
adata
[21]:
AnnData object with n_obs × n_vars = 3484 × 17954
obs: 'mclust', 'AgaeSMO', 'Ground Truth'
var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
uns: 'AgaeSMO_colors', 'adj_spatial', 'adj_spatial_', 'hvg', 'log1p', 'neighbors', 'umap'
obsm: 'AgaeSMO', 'AgaeSMO_pca', 'X_umap', 'adj_feature', 'alpha', 'emb_latent_omics1', 'emb_latent_omics2', 'feat', 'spatial', 'tensor'
obsp: 'connectivities', 'distances'
[22]:
n_cluster=6
tool = 'mclust' # mclust, leiden, and louvain
AgaeSMO_v1.clustering(adata,refine_=False, key='AgaeSMO', add_key='AgaeSMO', n_clusters=n_cluster, method=tool, use_pca=True)# visualization
annotation=pd.read_csv('data/Human_Lymph_Node_A1/annotation.csv')
adata.obs['Ground Truth']=annotation.loc[:,'manual-anno'].to_list()
indexs=AgaeSMO_v1.supervise_index(adata,"AgaeSMO",'Ground Truth')
print(indexs)
ARI=indexs["ARI"]
import matplotlib.pyplot as plt
fig, ax_list = plt.subplots(1, 2, figsize=(12, 4))
sc.pp.neighbors(adata, use_rep='AgaeSMO', n_neighbors=10)
sc.tl.umap(adata)
sc.pl.umap(adata, color='AgaeSMO', ax=ax_list[0], title='AgaeSMO', s=30, show=False)
sc.pl.embedding(adata, basis='spatial', color='AgaeSMO', ax=ax_list[1], title=f'AgaeSMO:{ARI}', s=50, show=False)
plt.tight_layout(w_pad=0.3)
# plt.show()
fitting ...
|======================================================================| 100%
{'ARI': 0.37791564193036475, 'NMI': 0.428201084832614, 'FMS': 0.5287651775711655, 'AMI': 0.4258045792302591, 'HMG': 0.4091611296985109, 'VMS': 0.428201084832614, 'MIS': 0.6902564599166731}
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
cax = scatter(
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
cax = scatter(
[ ]: