Tutorials_ST_SM
[1]:
import scanpy as sc
import torch
import os
import pandas as pd
[2]:
import community as louvain
import AgaeSMO as AgaeSMO_v1
[3]:
random_seed = 2022
AgaeSMO_v1.fix_seed(random_seed)
# In[4]:
device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
# the location of R, which is required for the 'mclust' algorithm. Please replace the path below with local R installation path
os.environ['R_HOME'] = '/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/R'
data of demo deposited in https://drive.google.com/drive/folders/1eK5zSKmSV2eaQ9jhIEcKPXwuiWaGfldH?usp=sharing
[4]:
adata_omics1 = sc.read_h5ad("data/SMA/st.h5ad")
adata_omics2 = sc.read_h5ad("data/SMA/ms.h5ad")
adata_omics1.var_names_make_unique()
adata_omics2.var_names_make_unique()
print(adata_omics1)
print(adata_omics2)
AnnData object with n_obs × n_vars = 2675 × 32285
obs: 'in_tissue', 'array_row', 'array_col'
var: 'gene_ids', 'feature_types', 'genome'
uns: 'spatial'
obsm: 'spatial'
AnnData object with n_obs × n_vars = 2438 × 1538
obs: 'array_row', 'array_col'
uns: 'spatial'
obsm: 'spatial'
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
utils.warn_names_duplicates("var")
[5]:
st_keys=list(adata_omics1.uns["spatial"].keys())[0]
adata_omics1.obsm["HE_domain_coor"]=adata_omics1.obsm["spatial"]*adata_omics1.uns["spatial"][st_keys]["scalefactors"]["tissue_hires_scalef"]
adata_omics2.obsm["HE_domain_coor"]=adata_omics2.obsm["spatial"]
import matplotlib.pyplot as plt
plt.scatter(adata_omics1.obsm["HE_domain_coor"][:,0],adata_omics1.obsm["HE_domain_coor"][:,1],s=1,c="red")
plt.scatter(adata_omics2.obsm["HE_domain_coor"][:,0],adata_omics2.obsm["HE_domain_coor"][:,1],s=1,c="blue")
[5]:
<matplotlib.collections.PathCollection at 0x2ab60fce04c0>
[6]:
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=2000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)
# sc.pp.scale(adata_omics1)
# sc.pp.filter_genes(adata_omics2, min_cells=10)
sc.pp.highly_variable_genes(adata_omics2, flavor="seurat_v3", n_top_genes=1000)
# sc.pp.normalize_total(adata_omics2, target_sum=1e4)
sc.pp.log1p(adata_omics2)
# sc.pp.scale(adata_omics2)
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/preprocessing/_highly_variable_genes.py:64: UserWarning: `flavor='seurat_v3'` expects raw count data, but non-integers were found.
warnings.warn(
[7]:
adata_omics1_high = adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = AgaeSMO_v1.pca(adata_omics1_high, n_comps=30)
# adata_omics1.obsm['tensor']=adata_omics1[:, adata_omics1.var['highly_variable']].X.toarray()
adata_omics1.obsm['tensor']=AgaeSMO_v1.pca(adata_omics1_high, n_comps=100)
# Protein
# adata_omics2 = AgaeSMO_v1.clr_normalize_each_cell(adata_omics2)
# sc.pp.scale(adata_omics2)
# adata_omics2_high = adata_omics2[:, adata_omics2.var['highly_variable']]
adata_omics2.obsm['feat'] = AgaeSMO_v1.pca(adata_omics2, n_comps=30)
# adata_omics2.obsm['tensor']=adata_omics2[:, adata_omics2.var['highly_variable']].X.toarray()
# adata_omics2.obsm['tensor']=adata_omics2.X.toarray()
adata_omics2.obsm['tensor']=AgaeSMO_v1.pca(adata_omics2, n_comps=100)
[8]:
data = AgaeSMO_v1.construct_neighbor_graph(adata_omics1, adata_omics2,6,4,k=6)
------Calculating spatial graph...
The graph contains 15568 edges, 2675 cells.
5.8198 neighbors per cell on average.
------Calculating spatial graph...
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
The graph contains 9510 edges, 2438 cells.
3.9007 neighbors per cell on average.
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
[9]:
model = AgaeSMO_v1.Train_AgaeSMO(data, device=device,epochs=600,learning_rate=0.001,weight_decay=0.005,trans_ngb=12,loss_weight=[1,1,1,1])
# train model
output = model.train()
# In[13]:
adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1'].copy()
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2'].copy()
adata.obsm['AgaeSMO'] = output['AgaeSMO'].copy()
adata.obsm['alpha'] = output['alpha']
spot number is not equal,adding graph transfer
torch.Size([2675, 100]) torch.Size([2438, 100])
dim_input1 100 dim_input2 100
dim_output1 30 dim_output2 30
0%| | 0/600 [00:00<?, ?it/s]/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/torch/nn/functional.py:1956: UserWarning: nn.functional.tanh is deprecated. Use torch.tanh instead.
warnings.warn("nn.functional.tanh is deprecated. Use torch.tanh instead.")
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/layer.py:250: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
self.alpha = F.softmax(torch.squeeze(self.vu) + 1e-6)
0%| | 1/600 [00:04<46:57, 4.70s/it]
epoch:0 2.4103517532348633
2%|▏ | 12/600 [00:05<01:42, 5.73it/s]
epoch:10 1.178350567817688
4%|▎ | 21/600 [00:07<01:16, 7.60it/s]
epoch:20 0.8841872811317444
5%|▌ | 32/600 [00:08<01:08, 8.23it/s]
epoch:30 0.7334849834442139
7%|▋ | 42/600 [00:09<01:07, 8.29it/s]
epoch:40 0.6432726383209229
9%|▊ | 52/600 [00:10<01:06, 8.26it/s]
epoch:50 0.5906811952590942
10%|█ | 62/600 [00:12<01:05, 8.27it/s]
epoch:60 0.5499380826950073
12%|█▏ | 72/600 [00:13<01:04, 8.25it/s]
epoch:70 0.5235077142715454
14%|█▎ | 82/600 [00:14<01:02, 8.24it/s]
epoch:80 0.49905869364738464
15%|█▌ | 92/600 [00:15<01:01, 8.28it/s]
epoch:90 0.48042938113212585
17%|█▋ | 102/600 [00:16<01:00, 8.23it/s]
epoch:100 0.4779009521007538
18%|█▊ | 111/600 [00:18<01:03, 7.76it/s]
epoch:110 0.4646218717098236
20%|██ | 122/600 [00:19<00:58, 8.24it/s]
epoch:120 0.45805737376213074
22%|██▏ | 132/600 [00:20<00:56, 8.30it/s]
epoch:130 0.448346883058548
24%|██▎ | 142/600 [00:21<00:55, 8.26it/s]
epoch:140 0.4533771574497223
25%|██▌ | 152/600 [00:23<00:54, 8.25it/s]
epoch:150 0.44102200865745544
27%|██▋ | 162/600 [00:24<00:53, 8.24it/s]
epoch:160 0.43565624952316284
29%|██▊ | 172/600 [00:25<00:52, 8.23it/s]
epoch:170 0.42828622460365295
30%|███ | 182/600 [00:26<00:50, 8.26it/s]
epoch:180 0.42275291681289673
32%|███▏ | 192/600 [00:28<00:49, 8.24it/s]
epoch:190 0.43138617277145386
34%|███▎ | 202/600 [00:29<00:47, 8.32it/s]
epoch:200 0.4205150902271271
35%|███▌ | 212/600 [00:30<00:47, 8.24it/s]
epoch:210 0.4160120487213135
37%|███▋ | 222/600 [00:31<00:45, 8.32it/s]
epoch:220 0.4219150245189667
39%|███▊ | 232/600 [00:32<00:44, 8.25it/s]
epoch:230 0.4204656779766083
40%|████ | 242/600 [00:34<00:43, 8.25it/s]
epoch:240 0.41811487078666687
42%|████▏ | 252/600 [00:35<00:42, 8.27it/s]
epoch:250 0.406606525182724
44%|████▎ | 262/600 [00:36<00:41, 8.23it/s]
epoch:260 0.41063404083251953
45%|████▌ | 272/600 [00:37<00:39, 8.28it/s]
epoch:270 0.40716657042503357
47%|████▋ | 282/600 [00:39<00:38, 8.27it/s]
epoch:280 0.40920937061309814
49%|████▊ | 292/600 [00:40<00:37, 8.31it/s]
epoch:290 0.4018547236919403
50%|█████ | 302/600 [00:41<00:36, 8.25it/s]
epoch:300 0.4001483619213104
52%|█████▏ | 312/600 [00:42<00:34, 8.28it/s]
epoch:310 0.4155341684818268
54%|█████▎ | 322/600 [00:43<00:33, 8.25it/s]
epoch:320 0.4047088623046875
55%|█████▌ | 332/600 [00:45<00:32, 8.24it/s]
epoch:330 0.39967647194862366
57%|█████▋ | 342/600 [00:46<00:31, 8.27it/s]
epoch:340 0.40037432312965393
59%|█████▊ | 352/600 [00:47<00:29, 8.30it/s]
epoch:350 0.3993385136127472
60%|██████ | 362/600 [00:48<00:28, 8.29it/s]
epoch:360 0.3955089747905731
62%|██████▏ | 372/600 [00:50<00:27, 8.23it/s]
epoch:370 0.3988928496837616
64%|██████▎ | 382/600 [00:51<00:26, 8.29it/s]
epoch:380 0.3988484740257263
65%|██████▌ | 392/600 [00:52<00:25, 8.26it/s]
epoch:390 0.395942747592926
67%|██████▋ | 402/600 [00:53<00:23, 8.32it/s]
epoch:400 0.4072328507900238
69%|██████▊ | 412/600 [00:54<00:22, 8.20it/s]
epoch:410 0.39476102590560913
70%|███████ | 422/600 [00:56<00:21, 8.28it/s]
epoch:420 0.39197808504104614
72%|███████▏ | 432/600 [00:57<00:20, 8.28it/s]
epoch:430 0.38943982124328613
74%|███████▎ | 442/600 [00:58<00:19, 8.23it/s]
epoch:440 0.39226868748664856
75%|███████▌ | 451/600 [00:59<00:19, 7.79it/s]
epoch:450 0.3968330919742584
epoch:460 0.4069817364215851
epoch:470 0.3957488536834717
epoch:480 0.3923015594482422
epoch:490 0.39537328481674194
epoch:500 0.3884434103965759
84%|████████▍ | 506/600 [00:59<00:00, 119.02it/s]
epoch:510 0.39142510294914246
86%|████████▌ | 516/600 [01:01<00:02, 31.97it/s]
epoch:520 0.3930674195289612
88%|████████▊ | 529/600 [01:02<00:04, 16.90it/s]
epoch:530 0.3895781934261322
90%|█████████ | 541/600 [01:04<00:05, 11.24it/s]
epoch:540 0.39307472109794617
92%|█████████▏| 552/600 [01:05<00:05, 8.85it/s]
epoch:550 0.3945847451686859
94%|█████████▎| 562/600 [01:06<00:04, 8.30it/s]
epoch:560 0.38469061255455017
95%|█████████▌| 572/600 [01:08<00:03, 8.26it/s]
epoch:570 0.3904060125350952
97%|█████████▋| 582/600 [01:09<00:02, 8.26it/s]
epoch:580 0.38975200057029724
99%|█████████▊| 592/600 [01:10<00:00, 8.25it/s]
epoch:590 0.39128002524375916
100%|██████████| 600/600 [01:11<00:00, 8.40it/s]
Model training finished!
[10]:
n_cluster=14
tool = 'mclust' # mclust, leiden, and louvain
AgaeSMO_v1.clustering(adata,refine_=True, key='AgaeSMO', add_key='AgaeSMO', n_clusters=n_cluster, method=tool, use_pca=True)# visualization
import matplotlib.pyplot as plt
fig, ax_list = plt.subplots(1, 2, figsize=(12, 4))
sc.pp.neighbors(adata, use_rep='AgaeSMO', n_neighbors=10)
sc.tl.umap(adata)
sc.pl.umap(adata, color='AgaeSMO', ax=ax_list[0], title='AgaeSMO', s=30, show=False)
sc.pl.embedding(adata, basis='spatial', color='AgaeSMO', ax=ax_list[1], title='AgaeSMO', s=80, show=False)
plt.tight_layout(w_pad=0.3)
plt.show()
R[write to console]: __ __
____ ___ _____/ /_ _______/ /_
/ __ `__ \/ ___/ / / / / ___/ __/
/ / / / / / /__/ / /_/ (__ ) /_
/_/ /_/ /_/\___/_/\__,_/____/\__/ version 6.1.1
Type 'citation("mclust")' for citing this R package in publications.
fitting ...
|======================================================================| 100%
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
cax = scatter(
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
cax = scatter(
[ ]: