Tutorials_ST_SM

[1]:
import scanpy as sc
import torch
import os
import pandas as pd

[2]:
import community as louvain
import AgaeSMO as AgaeSMO_v1
[3]:
random_seed = 2022
AgaeSMO_v1.fix_seed(random_seed)
# In[4]:


device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')

# the location of R, which is required for the 'mclust' algorithm. Please replace the path below with local R installation path
os.environ['R_HOME'] = '/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/R'

data of demo deposited in https://drive.google.com/drive/folders/1eK5zSKmSV2eaQ9jhIEcKPXwuiWaGfldH?usp=sharing

[4]:

adata_omics1 = sc.read_h5ad("data/SMA/st.h5ad") adata_omics2 = sc.read_h5ad("data/SMA/ms.h5ad") adata_omics1.var_names_make_unique() adata_omics2.var_names_make_unique() print(adata_omics1) print(adata_omics2)
AnnData object with n_obs × n_vars = 2675 × 32285
    obs: 'in_tissue', 'array_row', 'array_col'
    var: 'gene_ids', 'feature_types', 'genome'
    uns: 'spatial'
    obsm: 'spatial'
AnnData object with n_obs × n_vars = 2438 × 1538
    obs: 'array_row', 'array_col'
    uns: 'spatial'
    obsm: 'spatial'
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/anndata/_core/anndata.py:1820: UserWarning: Variable names are not unique. To make them unique, call `.var_names_make_unique`.
  utils.warn_names_duplicates("var")
[5]:

st_keys=list(adata_omics1.uns["spatial"].keys())[0] adata_omics1.obsm["HE_domain_coor"]=adata_omics1.obsm["spatial"]*adata_omics1.uns["spatial"][st_keys]["scalefactors"]["tissue_hires_scalef"] adata_omics2.obsm["HE_domain_coor"]=adata_omics2.obsm["spatial"] import matplotlib.pyplot as plt plt.scatter(adata_omics1.obsm["HE_domain_coor"][:,0],adata_omics1.obsm["HE_domain_coor"][:,1],s=1,c="red") plt.scatter(adata_omics2.obsm["HE_domain_coor"][:,0],adata_omics2.obsm["HE_domain_coor"][:,1],s=1,c="blue")

[5]:
<matplotlib.collections.PathCollection at 0x2ab60fce04c0>
_images/Tutorials_ST_SM_6_1.png
[6]:
sc.pp.filter_genes(adata_omics1, min_cells=10)
sc.pp.highly_variable_genes(adata_omics1, flavor="seurat_v3", n_top_genes=2000)
sc.pp.normalize_total(adata_omics1, target_sum=1e4)
sc.pp.log1p(adata_omics1)
# sc.pp.scale(adata_omics1)

# sc.pp.filter_genes(adata_omics2, min_cells=10)
sc.pp.highly_variable_genes(adata_omics2, flavor="seurat_v3", n_top_genes=1000)
# sc.pp.normalize_total(adata_omics2, target_sum=1e4)
sc.pp.log1p(adata_omics2)
# sc.pp.scale(adata_omics2)
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/preprocessing/_highly_variable_genes.py:64: UserWarning: `flavor='seurat_v3'` expects raw count data, but non-integers were found.
  warnings.warn(
[7]:
adata_omics1_high =  adata_omics1[:, adata_omics1.var['highly_variable']]
adata_omics1.obsm['feat'] = AgaeSMO_v1.pca(adata_omics1_high, n_comps=30)
# adata_omics1.obsm['tensor']=adata_omics1[:, adata_omics1.var['highly_variable']].X.toarray()
adata_omics1.obsm['tensor']=AgaeSMO_v1.pca(adata_omics1_high, n_comps=100)
# Protein
# adata_omics2 = AgaeSMO_v1.clr_normalize_each_cell(adata_omics2)
# sc.pp.scale(adata_omics2)
# adata_omics2_high =  adata_omics2[:, adata_omics2.var['highly_variable']]
adata_omics2.obsm['feat'] = AgaeSMO_v1.pca(adata_omics2, n_comps=30)
# adata_omics2.obsm['tensor']=adata_omics2[:, adata_omics2.var['highly_variable']].X.toarray()
# adata_omics2.obsm['tensor']=adata_omics2.X.toarray()
adata_omics2.obsm['tensor']=AgaeSMO_v1.pca(adata_omics2, n_comps=100)
[8]:
data = AgaeSMO_v1.construct_neighbor_graph(adata_omics1, adata_omics2,6,4,k=6)
------Calculating spatial graph...
The graph contains 15568 edges, 2675 cells.
5.8198 neighbors per cell on average.
------Calculating spatial graph...
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
The graph contains 9510 edges, 2438 cells.
3.9007 neighbors per cell on average.
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/utils.py:529: FutureWarning: pandas.value_counts is deprecated and will be removed in a future version. Use pd.Series(obj).value_counts() instead.
  plot_df = pd.value_counts(pd.value_counts(adata.uns['adj_spatial_']['Cell1']))
_images/Tutorials_ST_SM_9_4.png
_images/Tutorials_ST_SM_9_5.png
[9]:
model = AgaeSMO_v1.Train_AgaeSMO(data, device=device,epochs=600,learning_rate=0.001,weight_decay=0.005,trans_ngb=12,loss_weight=[1,1,1,1])

# train model
output = model.train()


# In[13]:


adata = adata_omics1.copy()
adata.obsm['emb_latent_omics1'] = output['emb_latent_omics1'].copy()
adata.obsm['emb_latent_omics2'] = output['emb_latent_omics2'].copy()
adata.obsm['AgaeSMO'] = output['AgaeSMO'].copy()
adata.obsm['alpha'] = output['alpha']
spot number is not equal,adding graph transfer
torch.Size([2675, 100]) torch.Size([2438, 100])
dim_input1 100 dim_input2 100
 dim_output1 30 dim_output2 30
  0%|          | 0/600 [00:00<?, ?it/s]/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/torch/nn/functional.py:1956: UserWarning: nn.functional.tanh is deprecated. Use torch.tanh instead.
  warnings.warn("nn.functional.tanh is deprecated. Use torch.tanh instead.")
/public/home/off_liukunpeng/project/11_cluster_problem/AgaeSMO/AgaeSMO/layer.py:250: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  self.alpha = F.softmax(torch.squeeze(self.vu) + 1e-6)
  0%|          | 1/600 [00:04<46:57,  4.70s/it]
epoch:0 2.4103517532348633
  2%|▏         | 12/600 [00:05<01:42,  5.73it/s]
epoch:10 1.178350567817688
  4%|▎         | 21/600 [00:07<01:16,  7.60it/s]
epoch:20 0.8841872811317444
  5%|▌         | 32/600 [00:08<01:08,  8.23it/s]
epoch:30 0.7334849834442139
  7%|▋         | 42/600 [00:09<01:07,  8.29it/s]
epoch:40 0.6432726383209229
  9%|▊         | 52/600 [00:10<01:06,  8.26it/s]
epoch:50 0.5906811952590942
 10%|█         | 62/600 [00:12<01:05,  8.27it/s]
epoch:60 0.5499380826950073
 12%|█▏        | 72/600 [00:13<01:04,  8.25it/s]
epoch:70 0.5235077142715454
 14%|█▎        | 82/600 [00:14<01:02,  8.24it/s]
epoch:80 0.49905869364738464
 15%|█▌        | 92/600 [00:15<01:01,  8.28it/s]
epoch:90 0.48042938113212585
 17%|█▋        | 102/600 [00:16<01:00,  8.23it/s]
epoch:100 0.4779009521007538
 18%|█▊        | 111/600 [00:18<01:03,  7.76it/s]
epoch:110 0.4646218717098236
 20%|██        | 122/600 [00:19<00:58,  8.24it/s]
epoch:120 0.45805737376213074
 22%|██▏       | 132/600 [00:20<00:56,  8.30it/s]
epoch:130 0.448346883058548
 24%|██▎       | 142/600 [00:21<00:55,  8.26it/s]
epoch:140 0.4533771574497223
 25%|██▌       | 152/600 [00:23<00:54,  8.25it/s]
epoch:150 0.44102200865745544
 27%|██▋       | 162/600 [00:24<00:53,  8.24it/s]
epoch:160 0.43565624952316284
 29%|██▊       | 172/600 [00:25<00:52,  8.23it/s]
epoch:170 0.42828622460365295
 30%|███       | 182/600 [00:26<00:50,  8.26it/s]
epoch:180 0.42275291681289673
 32%|███▏      | 192/600 [00:28<00:49,  8.24it/s]
epoch:190 0.43138617277145386
 34%|███▎      | 202/600 [00:29<00:47,  8.32it/s]
epoch:200 0.4205150902271271
 35%|███▌      | 212/600 [00:30<00:47,  8.24it/s]
epoch:210 0.4160120487213135
 37%|███▋      | 222/600 [00:31<00:45,  8.32it/s]
epoch:220 0.4219150245189667
 39%|███▊      | 232/600 [00:32<00:44,  8.25it/s]
epoch:230 0.4204656779766083
 40%|████      | 242/600 [00:34<00:43,  8.25it/s]
epoch:240 0.41811487078666687
 42%|████▏     | 252/600 [00:35<00:42,  8.27it/s]
epoch:250 0.406606525182724
 44%|████▎     | 262/600 [00:36<00:41,  8.23it/s]
epoch:260 0.41063404083251953
 45%|████▌     | 272/600 [00:37<00:39,  8.28it/s]
epoch:270 0.40716657042503357
 47%|████▋     | 282/600 [00:39<00:38,  8.27it/s]
epoch:280 0.40920937061309814
 49%|████▊     | 292/600 [00:40<00:37,  8.31it/s]
epoch:290 0.4018547236919403
 50%|█████     | 302/600 [00:41<00:36,  8.25it/s]
epoch:300 0.4001483619213104
 52%|█████▏    | 312/600 [00:42<00:34,  8.28it/s]
epoch:310 0.4155341684818268
 54%|█████▎    | 322/600 [00:43<00:33,  8.25it/s]
epoch:320 0.4047088623046875
 55%|█████▌    | 332/600 [00:45<00:32,  8.24it/s]
epoch:330 0.39967647194862366
 57%|█████▋    | 342/600 [00:46<00:31,  8.27it/s]
epoch:340 0.40037432312965393
 59%|█████▊    | 352/600 [00:47<00:29,  8.30it/s]
epoch:350 0.3993385136127472
 60%|██████    | 362/600 [00:48<00:28,  8.29it/s]
epoch:360 0.3955089747905731
 62%|██████▏   | 372/600 [00:50<00:27,  8.23it/s]
epoch:370 0.3988928496837616
 64%|██████▎   | 382/600 [00:51<00:26,  8.29it/s]
epoch:380 0.3988484740257263
 65%|██████▌   | 392/600 [00:52<00:25,  8.26it/s]
epoch:390 0.395942747592926
 67%|██████▋   | 402/600 [00:53<00:23,  8.32it/s]
epoch:400 0.4072328507900238
 69%|██████▊   | 412/600 [00:54<00:22,  8.20it/s]
epoch:410 0.39476102590560913
 70%|███████   | 422/600 [00:56<00:21,  8.28it/s]
epoch:420 0.39197808504104614
 72%|███████▏  | 432/600 [00:57<00:20,  8.28it/s]
epoch:430 0.38943982124328613
 74%|███████▎  | 442/600 [00:58<00:19,  8.23it/s]
epoch:440 0.39226868748664856
 75%|███████▌  | 451/600 [00:59<00:19,  7.79it/s]
epoch:450 0.3968330919742584
epoch:460 0.4069817364215851
epoch:470 0.3957488536834717
epoch:480 0.3923015594482422
epoch:490 0.39537328481674194
epoch:500 0.3884434103965759
 84%|████████▍ | 506/600 [00:59<00:00, 119.02it/s]
epoch:510 0.39142510294914246
 86%|████████▌ | 516/600 [01:01<00:02, 31.97it/s]
epoch:520 0.3930674195289612
 88%|████████▊ | 529/600 [01:02<00:04, 16.90it/s]
epoch:530 0.3895781934261322
 90%|█████████ | 541/600 [01:04<00:05, 11.24it/s]
epoch:540 0.39307472109794617
 92%|█████████▏| 552/600 [01:05<00:05,  8.85it/s]
epoch:550 0.3945847451686859
 94%|█████████▎| 562/600 [01:06<00:04,  8.30it/s]
epoch:560 0.38469061255455017
 95%|█████████▌| 572/600 [01:08<00:03,  8.26it/s]
epoch:570 0.3904060125350952
 97%|█████████▋| 582/600 [01:09<00:02,  8.26it/s]
epoch:580 0.38975200057029724
 99%|█████████▊| 592/600 [01:10<00:00,  8.25it/s]
epoch:590 0.39128002524375916
100%|██████████| 600/600 [01:11<00:00,  8.40it/s]
Model training finished!

[10]:

n_cluster=14 tool = 'mclust' # mclust, leiden, and louvain AgaeSMO_v1.clustering(adata,refine_=True, key='AgaeSMO', add_key='AgaeSMO', n_clusters=n_cluster, method=tool, use_pca=True)# visualization import matplotlib.pyplot as plt fig, ax_list = plt.subplots(1, 2, figsize=(12, 4)) sc.pp.neighbors(adata, use_rep='AgaeSMO', n_neighbors=10) sc.tl.umap(adata) sc.pl.umap(adata, color='AgaeSMO', ax=ax_list[0], title='AgaeSMO', s=30, show=False) sc.pl.embedding(adata, basis='spatial', color='AgaeSMO', ax=ax_list[1], title='AgaeSMO', s=80, show=False) plt.tight_layout(w_pad=0.3) plt.show()

R[write to console]:                    __           __
   ____ ___  _____/ /_  _______/ /_
  / __ `__ \/ ___/ / / / / ___/ __/
 / / / / / / /__/ / /_/ (__  ) /_
/_/ /_/ /_/\___/_/\__,_/____/\__/   version 6.1.1
Type 'citation("mclust")' for citing this R package in publications.

fitting ...
  |======================================================================| 100%
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:1234: FutureWarning: The default value of 'ignore' for the `na_action` parameter in pandas.Categorical.map is deprecated and will be changed to 'None' in a future version. Please set na_action to the desired value to avoid seeing this warning
  color_vector = pd.Categorical(values.map(color_map))
/public/home/off_liukunpeng/software/anaconda3/envs/pyg1/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
  cax = scatter(
_images/Tutorials_ST_SM_11_3.png
[ ]: