![生信交流与合作请关注公众号@生信探索]
准备环境
pyscenic
pipinstallpyscenic-i/pypi/simple/micromambaactivateSC
安装docker
需要有root权限或者在docker的用户组
sudoapt-getupdate sudoapt-getinstall\ ca-certificates\ curl\ gnupg\ lsb-release #2.AddDocker’sofficialGPGkey sudomkdir-m0755-p/etc/apt/keyrings curl-fsSL/linux/ubuntu/gpg|sudogpg--dearmor-o/etc/apt/keyrings/docker.gpg #3.Usethefollowingcommandtosetuptherepository echo\ "deb[arch=$(dpkg--print-architecture)signed-by=/etc/apt/keyrings/docker.gpg]/linux/ubuntu\ $(lsb_release-cs)stable"|sudotee/etc/apt/sources.list.d/docker.list>/dev/null #4.installdocker sudoapt-getupdate sudoapt-getinstalldocker-cedocker-ce-clicontainerd.iodocker-buildx-plugindocker-compose-plugin #5.chmod sudogroupadddocker sudousermod-aGdocker${USER}#把非root用户添加到用户组 sudosystemctlrestartdocker#1.UpdatetheaptpackageindexandinstallpackagestoallowapttousearepositoryoverHTTPS
docker pyscenic
dockerpullaertslab/pyscenic:0.12.1
准备数据库文件
#准备数据库(人) mkdir-p~/DataHub/SCENIC cd~/DataHub/SCENIC #getrankingdatabase wget/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather wget/cistarget/databases/homo_sapiens/hg38/refseq_r80/mc_v10_clust/gene_based/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather.sha1sum.txt sha1sum-chg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather.sha1sum.txthg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather #getmotifdatabase wget/cistarget/motif2tf/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl #TFlist#查看文本文件是不是只有一列基因 wget/aertslab/pySCENIC/blob/master/resources/hs_hgnc_tfs.txt #>>>downlod_SCENIC.sh>>> nohupzshdownlod_SCENIC.sh&>downlod_SCENIC.sh.log&#>>>downlod_SCENIC.sh>>>
准备count数据
importnumpyasnp importpandasaspd importscanpyassc importmatplotlib.pyplotasplt importloompy OUTPUT_DIR="output/05.SCENIC" Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True) adata=sc.read_h5ad('output/03.inferCNV/adata.h5') adata_raw=adata.raw.to_adata() rownames=dict(Gene=adata_raw.var_names.tolist()) colnames=dict(CellID=adata_raw.obs_names.tolist()) loompy.create(filename=OUTPUT_DIR+"/X.loom",layers=adata_raw.X.transpose(),row_attrs=rownames,col_attrs=colnames)frompathlibimportPath
pyscenic CLI
建议使用下边的docker运行这两个步骤
n_jobs=12 mtx_path=X.loom dir=/home/victor/DataHub/SCENIC tfs=$dir/hs_hgnc_tfs.txt feather=$dir/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather tbl=$dir/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl #STEP1/3:Generegulatorynetworkinference,andgenerationofco-expressionmodules pyscenicgrn\ --seed1314\ --num_workers${n_jobs}\ --methodgrnboost2\ --outputgrn.csv\ --sparse\ ${mtx_path}\ ${tfs} #STEP2/3:Regulonprediction(cisTarget) pyscenicctx\ --num_workers${n_jobs}\ --modedask_multiprocessing\ --mask_dropouts\ --sparse\ --outputctx.csv\ --expression_mtx_fname${mtx_path}\ --annotations_fname${tbl}\ grn.csv\ ${feather}#human
运行pySCENIC
micromambaactivateSC nohupzsh~/Project/SC10X/src/run_SCENIC.sh&>run_SCENIC.sh.log&cdoutput/05.SCENIC
docker运行脚本
docker脚本
n_jobs=20 input_dir=/home/victor/DataHub/SCENIC output_dir=/home/victor/Project/SC10X/output/05.SCENIC #STEP1/3:Generegulatorynetworkinference,andgenerationofco-expressionmodules dockerrun--rm\ -v${input_dir}:/input_data\ -v${output_dir}:/output_data\ aertslab/pyscenic:0.12.1pyscenicgrn\ --num_workers${n_jobs}\ --methodgrnboost2\ --output/output_data/grn.csv\ --sparse\ /input_data/X.loom\ /input_data/hs_hgnc_tfs.txt #STEP2/3:Regulonprediction(cisTarget) dockerrun--rm\ -v${input_dir}:/input_data\ -v${output_dir}:/output_data\ aertslab/pyscenic:0.12.1pyscenicctx\ /output_data/grn.csv\ /input_data/hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings.feather\ --mask_dropouts\ --sparse\ --annotations_fname/input_data/motifs-v10nr_clust-nr.hgnc-m0.001-o0.0.tbl\ --expression_mtx_fname/output_data/X.loom\ --mode"custom_multiprocessing"\ --output/output_data/ctx.csv\ --num_workers${n_jobs} #<<<scenic_docker.sh<<<#>>>scenic_docker.sh>>>
运行
nohupzsh~/Project/SC10X/src/scenic_docker.sh&>scenic_docker.sh.log&cdoutput/05.SCENIC
在Jupyter中运行
importoperator importcytoolz importnumpyasnp importpandasaspd importmatplotlib.pyplotasplt importseabornassns importscanpyassc frompyscenic.utilsimportload_motifs frompyscenic.aucellimportaucell frompyscenic.binarizationimportbinarize frompyscenic.plottingimportplot_binarization,plot_rss frompyscenic.transformimportdf2regulons importbioquestasbq#/BioQuest/bioquestfrompathlibimportPath
Path(OUTPUT_DIR).mkdir(parents=True,exist_ok=True)OUTPUT_DIR='output/05.SCENIC'
adata_raw=adata.raw.to_adata()adata=sc.read_h5ad('output/03.inferCNV/adata.h5')
自定义函数
""" 从ctx.csv中筛选重要的regulons,之后再运行AUCell """ motifs.columns=motifs.columns.droplevel(0) defcontains(*elems): deff(context): returnany(elemincontextforeleminelems) returnf #Forthecreationofregulonsweonlykeepthe10-speciesdatabasesandtheactivatingmodules.Wealsoremovethe #enrichedmotifsforthemodulesthatwerecreatedusingthemethod'weight>50.0%'(becausethesemodulesarenotpart #ofthedefaultsettingsofmodules_from_adjacenciesanymore. lg=np.fromiter(map(pose(operator.not_,contains('weight>50.0%')),motifs.Context),dtype=np.bool)&\ np.fromiter(map(contains(*db_names),motifs.Context),dtype=np.bool)&\ np.fromiter(map(contains('activating'),motifs.Context),dtype=np.bool) motifs=motifs.loc[lg,:] #WebuildregulonsonlyusingenrichedmotifswithaNESof3.0orhigher;wetakeonlydirectlyannotatedTFsorTFannotated #foranorthologousgeneintoaccount;andweonlykeepregulonswithatleast10genes. regulons=list(filter(lambdar:len(r)>=10, df2regulons(motifs[(motifs.NES>=3.0) &((motifs['Annotation']=='geneisdirectlyannotated') |(motifs['Annotation'].str.startswith('geneisorthologousto') &motifs['Annotation'].str.endswith('whichisdirectlyannotatedformotif'))) ]))) #Renameregulons,i.e.removesuffix. returnlist(map(lambdar:r.rename(r.transcription_factor),regulons))deffilter_regulons(motifs,db_names=("hg38_10kbp_up_10kbp_down_full_tx_v10_clust.genes_vs_motifs.rankings",)):
STEP 3/3: Cellular enrichment (AUCell)
从ctx.csv中筛选重要的regulons,之后再运行AUCell
regulons=filter_regulons(df_motifs)df_motifs=load_motifs("output/05.SCENIC/ctx.csv")
count数据
exp_mtx=pd.DataFrame(adata_raw.X.toarray(),columns=adata_raw.var_names,index=adata_raw.obs_names)
运行aucell
auc_mtx.to_csv(OUTPUT_DIR+"/aucell.csv")auc_mtx=aucell(exp_mtx=exp_mtx,signatures=regulons,seed=1314,num_workers=12)
STEP 4/3: Regulon activity binarization (optional)
bin_mtx,thresholds=binarize(auc_mtx,seed=1314,num_workers=12) bin_mtx.to_csv("bin_mtx.csv") thresholds.to_frame().rename(columns={0:'threshold'}).to_csv("thresholds.csv")auc_mtx=pd.read_csv(OUTPUT_DIR+"/aucell.csv",index_col=0)
输出文件
pyscenic grn
Input: raw count或者log后的count
Output:List of adjacencies between a TF and its targets stored ingrn.csv
pyscenic ctx
Output:List of adjacencies between a TF and its targets stored inctx.csv
pyscenic aucell
Output:aucell.csv
本文由 mdnice 多平台发布