Tutorials_paired_modality

[1]:

import scanpy as sc import torch import os import pandas as pd os.chdir("/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO")
[2]:
import sys
sys.path.append("/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO")
[3]:
import community as louvain
import AgaeSMO as AgaeSMO_v1
[4]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

[5]:
os.environ['R_HOME'] = '/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/R'

[6]:

adata_omics1 = sc.read_h5ad("data/Human_Lymph_Node_A1/adata_RNA.h5ad") adata_omics2 = sc.read_h5ad("data/Human_Lymph_Node_A1/adata_ADT.h5ad") adata_omics1.var_names_make_unique() adata_omics2.var_names_make_unique()
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
[7]:
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=3000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)
sc.pp.scale(adata_omics1)
[8]:
adata_omics1_high =  adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = AgaeSMO_v1.pca(adata_omics1_high, n_comps=adata_omics2.n_vars-1)
adata_omics1.obsm['tensor']=adata_omics1[:, adata_omics1.var['highly_variable']].X
# adata_omics1.obsm['tensor']=AgaeSMO_v1.pca(adata_omics1_high, n_comps=100)

adata_omics2 = AgaeSMO_v1.clr_normalize_each_cell(adata_omics2)
sc.pp.scale(adata_omics2)
adata_omics2.obsm['feat'] = AgaeSMO_v1.pca(adata_omics2, n_comps=adata_omics2.n_vars-1)
# adata_omics2.obsm['tensor']=AgaeSMO_v1.pca(adata_omics2, n_comps=adata_omics2.n_vars-1)
adata_omics2.obsm['tensor']=adata_omics2.X

[9]:
data = AgaeSMO_v1.construct_neighbor_graph(adata_omics1, adata_omics2,6,6,k=6)

------Calculating spatial graph...
The graph contains 20902 edges, 3484 cells.
5.9994 neighbors per cell on average.
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
------Calculating spatial graph...
The graph contains 20902 edges, 3484 cells.
5.9994 neighbors per cell on average.
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
_images/Tutorials_paired_modality_9_4.png
_images/Tutorials_paired_modality_9_5.png
[10]:
model = AgaeSMO_v1.Train_AgaeSMO(data, device=device,learning_rate=0.001,epochs=600,loss_weight=[1,1,1,1])
output = model.train()
torch.Size([3484, 3000]) torch.Size([3484, 31])
dim_input1 3000 dim_input2 31
 dim_output1 30 dim_output2 30
  0%|          | 0/600 [00:00<?, ?it/s]/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/torch/nn/functional.py:1956: UserWarning: nn.functional.tanh is deprecated. Use torch.tanh instead.
  warnings.warn("nn.functional.tanh is deprecated. Use torch.tanh instead.")
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/layer.py:250: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  self.alpha = F.softmax(torch.squeeze(self.vu) + 1e-6)
  0%|          | 1/600 [00:03<34:10,  3.42s/it]
epoch:0 3.523435115814209
  2%|▏         | 11/600 [00:04<01:31,  6.40it/s]
epoch:10 1.109488844871521
  4%|▎         | 21/600 [00:05<01:07,  8.53it/s]
epoch:20 0.9199943542480469
  5%|▌         | 31/600 [00:06<01:05,  8.62it/s]
epoch:30 0.8487171530723572
  7%|▋         | 41/600 [00:07<01:04,  8.60it/s]
epoch:40 0.8076435327529907
  8%|▊         | 51/600 [00:08<01:03,  8.65it/s]
epoch:50 0.7793477177619934
 10%|█         | 61/600 [00:09<01:02,  8.63it/s]
epoch:60 0.7591758370399475
 12%|█▏        | 71/600 [00:11<01:01,  8.63it/s]
epoch:70 0.7457008361816406
 14%|█▎        | 81/600 [00:12<00:59,  8.65it/s]
epoch:80 0.7350626587867737
 15%|█▌        | 91/600 [00:13<00:58,  8.65it/s]
epoch:90 0.7239288687705994
 17%|█▋        | 101/600 [00:14<00:57,  8.65it/s]
epoch:100 0.7157127261161804
 18%|█▊        | 111/600 [00:15<00:56,  8.65it/s]
epoch:110 0.7050766944885254
 20%|██        | 121/600 [00:16<00:55,  8.65it/s]
epoch:120 0.7002685070037842
 22%|██▏       | 131/600 [00:17<00:54,  8.65it/s]
epoch:130 0.690095841884613
 24%|██▎       | 141/600 [00:18<00:53,  8.66it/s]
epoch:140 0.6826991438865662
 25%|██▌       | 151/600 [00:19<00:52,  8.63it/s]
epoch:150 0.6762441396713257
 27%|██▋       | 161/600 [00:20<00:50,  8.64it/s]
epoch:160 0.6688088178634644
 28%|██▊       | 171/600 [00:21<00:49,  8.64it/s]
epoch:170 0.6634473204612732
 30%|███       | 181/600 [00:23<00:48,  8.64it/s]
epoch:180 0.6571555137634277
 32%|███▏      | 191/600 [00:24<00:47,  8.64it/s]
epoch:190 0.6515916585922241
 34%|███▎      | 201/600 [00:25<00:46,  8.64it/s]
epoch:200 0.6466171741485596
 35%|███▌      | 211/600 [00:26<00:45,  8.64it/s]
epoch:210 0.6423236131668091
 37%|███▋      | 221/600 [00:27<00:43,  8.64it/s]
epoch:220 0.6366432905197144
 38%|███▊      | 231/600 [00:28<00:42,  8.63it/s]
epoch:230 0.6339633464813232
 40%|████      | 241/600 [00:29<00:41,  8.64it/s]
epoch:240 0.6290221214294434
 42%|████▏     | 251/600 [00:30<00:40,  8.63it/s]
epoch:250 0.6252649426460266
 44%|████▎     | 261/600 [00:31<00:39,  8.63it/s]
epoch:260 0.6232700347900391
 45%|████▌     | 271/600 [00:32<00:38,  8.63it/s]
epoch:270 0.6188769340515137
 47%|████▋     | 281/600 [00:33<00:36,  8.63it/s]
epoch:280 0.6156838536262512
 48%|████▊     | 291/600 [00:35<00:35,  8.62it/s]
epoch:290 0.6128601431846619
 50%|█████     | 301/600 [00:36<00:34,  8.64it/s]
epoch:300 0.6108685731887817
 52%|█████▏    | 311/600 [00:37<00:33,  8.62it/s]
epoch:310 0.6093709468841553
 54%|█████▎    | 321/600 [00:38<00:32,  8.63it/s]
epoch:320 0.6053789258003235
 55%|█████▌    | 331/600 [00:39<00:31,  8.64it/s]
epoch:330 0.6036992073059082
 57%|█████▋    | 341/600 [00:40<00:30,  8.62it/s]
epoch:340 0.5986937880516052
 58%|█████▊    | 351/600 [00:41<00:28,  8.62it/s]
epoch:350 0.5950220823287964
 60%|██████    | 361/600 [00:42<00:27,  8.64it/s]
epoch:360 0.5954763293266296
 62%|██████▏   | 371/600 [00:43<00:26,  8.61it/s]
epoch:370 0.5899379253387451
 64%|██████▎   | 381/600 [00:44<00:25,  8.61it/s]
epoch:380 0.5875986218452454
 65%|██████▌   | 391/600 [00:45<00:24,  8.62it/s]
epoch:390 0.5869743227958679
 67%|██████▋   | 401/600 [00:47<00:23,  8.63it/s]
epoch:400 0.58343505859375
 68%|██████▊   | 411/600 [00:48<00:21,  8.63it/s]
epoch:410 0.5789999961853027
 70%|███████   | 421/600 [00:49<00:20,  8.62it/s]
epoch:420 0.576606035232544
 72%|███████▏  | 431/600 [00:50<00:19,  8.64it/s]
epoch:430 0.5726597309112549
 73%|███████▎  | 440/600 [00:51<00:17,  9.18it/s]
epoch:440 0.5699533820152283
epoch:450 0.5658155083656311
epoch:460 0.5609947443008423
epoch:470 0.5592743158340454
epoch:480 0.5550069212913513
epoch:490 0.5514665842056274
 83%|████████▎ | 496/600 [00:51<00:00, 135.45it/s]
epoch:500 0.5470584630966187
 84%|████████▍ | 507/600 [00:52<00:02, 34.47it/s]
epoch:510 0.5442636013031006
 87%|████████▋ | 521/600 [00:54<00:04, 18.25it/s]
epoch:520 0.5398437976837158
 88%|████████▊ | 530/600 [00:55<00:04, 14.26it/s]
epoch:530 0.5369762778282166
 90%|█████████ | 542/600 [00:56<00:05, 10.72it/s]
epoch:540 0.5338019132614136
 92%|█████████▏| 551/600 [00:57<00:05,  9.21it/s]
epoch:550 0.5289199948310852
 94%|█████████▎| 561/600 [00:58<00:04,  8.66it/s]
epoch:560 0.5248134732246399
 95%|█████████▌| 571/600 [00:59<00:03,  8.63it/s]
epoch:570 0.5206929445266724
 97%|█████████▋| 581/600 [01:00<00:02,  8.61it/s]
epoch:580 0.5169838666915894
 98%|█████████▊| 591/600 [01:01<00:01,  8.62it/s]
epoch:590 0.5164998769760132
100%|██████████| 600/600 [01:02<00:00,  9.54it/s]
Model training finished!

[11]:
adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1'].copy()
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2'].copy()
adata.obsm['AgaeSMO'] = output['AgaeSMO'].copy()
adata.obsm['alpha'] = output['alpha']
[21]:
adata=sc.read_h5ad("../AgaeSMO/result/Dataset11_Human_Lymph_Node_A1.h5ad")
adata
[21]:
AnnData object with n_obs × n_vars = 3484 × 17954
    obs: 'mclust', 'AgaeSMO', 'Ground Truth'
    var: 'gene_ids', 'feature_types', 'genome', 'n_cells', 'highly_variable', 'highly_variable_rank', 'means', 'variances', 'variances_norm', 'mean', 'std'
    uns: 'AgaeSMO_colors', 'adj_spatial', 'adj_spatial_', 'hvg', 'log1p', 'neighbors', 'umap'
    obsm: 'AgaeSMO', 'AgaeSMO_pca', 'X_umap', 'adj_feature', 'alpha', 'emb_latent_omics1', 'emb_latent_omics2', 'feat', 'spatial', 'tensor'
    obsp: 'connectivities', 'distances'
[22]:

n_cluster=6 tool = 'mclust' # mclust, leiden, and louvain AgaeSMO_v1.clustering(adata,refine_=False, key='AgaeSMO', add_key='AgaeSMO', n_clusters=n_cluster, method=tool, use_pca=True)# visualization annotation=pd.read_csv('data/Human_Lymph_Node_A1/annotation.csv') adata.obs['Ground Truth']=annotation.loc[:,'manual-anno'].to_list() indexs=AgaeSMO_v1.supervise_index(adata,"AgaeSMO",'Ground Truth') print(indexs) ARI=indexs["ARI"] import matplotlib.pyplot as plt fig, ax_list = plt.subplots(1, 2, figsize=(12, 4)) sc.pp.neighbors(adata, use_rep='AgaeSMO', n_neighbors=10) sc.tl.umap(adata) sc.pl.umap(adata, color='AgaeSMO', ax=ax_list[0], title='AgaeSMO', s=30, show=False) sc.pl.embedding(adata, basis='spatial', color='AgaeSMO', ax=ax_list[1], title=f'AgaeSMO:{ARI}', s=50, show=False) plt.tight_layout(w_pad=0.3) # plt.show()


fitting ...
  |======================================================================| 100%
{'ARI': 0.37791564193036475, 'NMI': 0.428201084832614, 'FMS': 0.5287651775711655, 'AMI': 0.4258045792302591, 'HMG': 0.4091611296985109, 'VMS': 0.428201084832614, 'MIS': 0.6902564599166731}
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
_images/Tutorials_paired_modality_13_2.png
[ ]: