300字范文 > 单细胞转录组实战06: pySCENIC转录因子分析（docker）

单细胞转录组实战06: pySCENIC转录因子分析（docker）

时间：2021-07-11 00:57:06

![生信交流与合作请关注公众号@生信探索]

准备环境

pyscenic

micromambaactivateSC

pipinstallpyscenic-i/pypi/simple/

安装docker

需要有root权限或者在docker的用户组

#1.UpdatetheaptpackageindexandinstallpackagestoallowapttousearepositoryoverHTTPS

sudoapt-getupdate

sudoapt-getinstall\

ca-certificates\

curl\

gnupg\

lsb-release

#2.AddDocker’sofficialGPGkey

sudomkdir-m0755-p/etc/apt/keyrings

curl-fsSL/linux/ubuntu/gpg|sudogpg--dearmor-o/etc/apt/keyrings/docker.gpg

#3.Usethefollowingcommandtosetuptherepository

echo\

"deb[arch=$(dpkg--print-architecture)signed-by=/etc/apt/keyrings/docker.gpg]/linux/ubuntu\

$(lsb_release-cs)stable"|sudotee/etc/apt/sources.list.d/docker.list>/dev/null

#4.installdocker

sudoapt-getupdate

sudoapt-getinstalldocker-cedocker-ce-clicontainerd.iodocker-buildx-plugindocker-compose-plugin

#5.chmod

sudogroupadddocker

sudousermod-aGdocker${USER}#把非root用户添加到用户组

sudosystemctlrestartdocker

docker pyscenic

dockerpullaertslab/pyscenic:0.12.1

准备数据库文件

#>>>downlod_SCENIC.sh>>>

#准备数据库（人）

mkdir-p~/DataHub/SCENIC

cd~/DataHub/SCENIC

#getrankingdatabase

wget/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather

wget/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather.sha1sum.txt

sha1sum-chg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather.sha1sum.txthg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather

#getmotifdatabase

wget/cistarget/motif2tf/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl

#TFlist#查看文本文件是不是只有一列基因

wget/aertslab/pySCENIC/blob/master/resources/hs_hgnc_tfs.txt

#>>>downlod_SCENIC.sh>>>

nohupzshdownlod_SCENIC.sh&>downlod_SCENIC.sh.log&

准备count数据

frompathlibimportPath

importnumpyasnp

importpandasaspd

importscanpyassc

importmatplotlib.pyplotasplt

importloompy

OUTPUT_DIR="output/05.SCENIC"

Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True)

adata=sc.read_h5ad('output/03.inferCNV/adata.h5')

adata_raw=adata.raw.to_adata()

rownames=dict(Gene=adata_raw.var_names.tolist())

colnames=dict(CellID=adata_raw.obs_names.tolist())

loompy.create(filename=OUTPUT_DIR+"/X.loom",layers=adata_raw.X.transpose(),row_attrs=rownames,col_attrs=colnames)

pyscenic CLI

建议使用下边的docker运行这两个步骤

#human

n_jobs=12

mtx_path=X.loom

dir=/home/victor/DataHub/SCENIC

tfs=$dir/hs_hgnc_tfs.txt

feather=$dir/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather

tbl=$dir/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl

#STEP1/3:Generegulatorynetworkinference,andgenerationofco-expressionmodules

pyscenicgrn\

--seed1314\

--num_workers${n_jobs}\

--methodgrnboost2\

--outputgrn.csv\

--sparse\

${mtx_path}\

${tfs}

#STEP2/3:Regulonprediction(cisTarget)

pyscenicctx\

--num_workers${n_jobs}\

--modedask_multiprocessing\

--mask_dropouts\

--sparse\

--outputctx.csv\

--expression_mtx_fname${mtx_path}\

--annotations_fname${tbl}\

grn.csv\

${feather}

运行pySCENIC

cdoutput/05.SCENIC

micromambaactivateSC

nohupzsh~/Project/SC10X/src/run_SCENIC.sh&>run_SCENIC.sh.log&

docker运行脚本

docker脚本

#>>>scenic_docker.sh>>>

n_jobs=20

input_dir=/home/victor/DataHub/SCENIC

output_dir=/home/victor/Project/SC10X/output/05.SCENIC

#STEP1/3:Generegulatorynetworkinference,andgenerationofco-expressionmodules

dockerrun--rm\

-v${input_dir}:/input_data\

-v${output_dir}:/output_data\

aertslab/pyscenic:0.12.1pyscenicgrn\

--num_workers${n_jobs}\

--methodgrnboost2\

--output/output_data/grn.csv\

--sparse\

/input_data/X.loom\

/input_data/hs_hgnc_tfs.txt

#STEP2/3:Regulonprediction(cisTarget)

dockerrun--rm\

-v${input_dir}:/input_data\

-v${output_dir}:/output_data\

aertslab/pyscenic:0.12.1pyscenicctx\

/output_data/grn.csv\

/input_data/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather\

--mask_dropouts\

--sparse\

--annotations_fname/input_data/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl\

--expression_mtx_fname/output_data/X.loom\

--mode"custom_multiprocessing"\

--output/output_data/ctx.csv\

--num_workers${n_jobs}

#<<<scenic_docker.sh<<<

运行

cdoutput/05.SCENIC

nohupzsh~/Project/SC10X/src/scenic_docker.sh&>scenic_docker.sh.log&

在Jupyter中运行

frompathlibimportPath

importoperator

importcytoolz

importnumpyasnp

importpandasaspd

importmatplotlib.pyplotasplt

importseabornassns

importscanpyassc

frompyscenic.utilsimportload_motifs

frompyscenic.aucellimportaucell

frompyscenic.binarizationimportbinarize

frompyscenic.plottingimportplot_binarization,plot_rss

frompyscenic.transformimportdf2regulons

importbioquestasbq#/BioQuest/bioquest

OUTPUT_DIR='output/05.SCENIC'

Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True)

adata=sc.read_h5ad('output/03.inferCNV/adata.h5')

adata_raw=adata.raw.to_adata()

自定义函数

deffilter_regulons(motifs,db_names=("hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings",)):

"""

从ctx.csv中筛选重要的regulons，之后再运行AUCell

"""

motifs.columns=motifs.columns.droplevel(0)

defcontains(*elems):

deff(context):

returnany(elemincontextforeleminelems)

returnf

#Forthecreationofregulonsweonlykeepthe10-speciesdatabasesandtheactivatingmodules.Wealsoremovethe

#enrichedmotifsforthemodulesthatwerecreatedusingthemethod'weight>50.0%'(becausethesemodulesarenotpart

#ofthedefaultsettingsofmodules_from_adjacenciesanymore.

lg=np.fromiter(map(pose(operator.not_,contains('weight>50.0%')),motifs.Context),dtype=np.bool)&\

np.fromiter(map(contains(*db_names),motifs.Context),dtype=np.bool)&\

np.fromiter(map(contains('activating'),motifs.Context),dtype=np.bool)

motifs=motifs.loc[lg,:]

#WebuildregulonsonlyusingenrichedmotifswithaNESof3.0orhigher;wetakeonlydirectlyannotatedTFsorTFannotated

#foranorthologousgeneintoaccount;andweonlykeepregulonswithatleast10genes.

regulons=list(filter(lambdar:len(r)>=10,

df2regulons(motifs[(motifs.NES>=3.0)

&((motifs['Annotation']=='geneisdirectlyannotated')

|(motifs['Annotation'].str.startswith('geneisorthologousto')

&motifs['Annotation'].str.endswith('whichisdirectlyannotatedformotif')))

])))

#Renameregulons,i.e.removesuffix.

returnlist(map(lambdar:r.rename(r.transcription_factor),regulons))

STEP 3/3: Cellular enrichment (AUCell)

从ctx.csv中筛选重要的regulons，之后再运行AUCell

df_motifs=load_motifs("output/05.SCENIC/ctx.csv")

regulons=filter_regulons(df_motifs)

count数据

exp_mtx=pd.DataFrame(adata_raw.X.toarray(),columns=adata_raw.var_names,index=adata_raw.obs_names)

运行aucell

auc_mtx=aucell(exp_mtx=exp_mtx,signatures=regulons,seed=1314,num_workers=12)

auc_mtx.to_csv(OUTPUT_DIR+"/aucell.csv")

STEP 4/3: Regulon activity binarization (optional)

auc_mtx=pd.read_csv(OUTPUT_DIR+"/aucell.csv",index_col=0)

bin_mtx,thresholds=binarize(auc_mtx,seed=1314,num_workers=12)

bin_mtx.to_csv("bin_mtx.csv")

thresholds.to_frame().rename(columns={0:'threshold'}).to_csv("thresholds.csv")

输出文件

pyscenic grn

Input: raw count或者log后的count

Output:List of adjacencies between a TF and its targets stored ingrn.csv

pyscenic ctx

Output:List of adjacencies between a TF and its targets stored inctx.csv

pyscenic aucell

Output:aucell.csv

本文由 mdnice 多平台发布

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。