300字范文 > Oracle字符串类型扩容隐患

Oracle字符串类型扩容隐患

时间：2023-12-18 10:26:27

相关推荐

Oracle字符串类型扩容隐患

兄弟给了个案例，这是在设计开发环节很可能被忽略的一个问题，如下测试表，c1字段按照char存储，c2字段按照byte存储，

create table test(c1 varchar2(10 char), c2 varchar2(10 byte));

现在要扩容字段，以下两种操作，有什么区别？

altertabletestmodifyc1varchar2(20);alter table test modify c2 varchar2(30);

《NLS_LENGTH_SEMANTICS参数引申的问题》文章介绍了字符串类型字段按照char和byte存储的形式，默认按照byte存储，但是上述测试表，一个是按照char，一个是按照byte，其实问题可以翻译成，两个扩容操作，一个是char->byte，一个是byte->byte，有什么区别？

从语义上讲，例如UTF-8字符集，1个中文占用三个字节，GBK字符集，1个中文占用两个字节，如果按照存储中文来计算，

1. UTF-8字符集，

原始：c1 varchar2(10 char)可以存储10个中文，占30个字节。

c2 varchar2(10 byte)可以存储10/3=3个中文，占9个字节。

扩容：c1 varchar2(20)是20个字节，可以存储20/3=6个中文。

c2 varchar2(30)是30个字节，可以存储30/3=10个中文。

2. GBK字符集，

原始：c1 varchar2(10 char)可以存储10个中文，占20个字节。

c2 varchar2(10 byte)可以存储10/2=5个中文，占10个字节。

扩容：c1 varchar2(20)是20个字节，可以存储20/2=10个中文。

c2 varchar2(30)是30个字节，可以存储30/2=15个中文。

因此，不同字符集，char和byte之间的转换，存储中文字符的个数，有可能存储更多，有可能存储更少，还可能是相同的，取决于char和byte的换算关系以及扩容的数值。如果只存储英文字符，从存储容量来说，几乎无影响。

从实际运行上，char->byte和byte->byte，还是有些区别的，主要体现在对性能的影响。

如果是char->byte，除了扩容长度，他还设计类型上的转换，从逻辑上讲，需要判断当前字段存储的值，在进行转换后，是否出现超常的情况，

alter table test modify c1 varchar2(20);

这是上述操作对应的10046，为了扩容字段，在数据字典层面，执行了35条SQL，

select count(*) from ind$ i where i.bo#=:1select /*+ rule */ bucket_cnt, row_cnt, cache_cnt, null_cnt, timestamp#, sample_size, minimum, maximum, distcnt, lowval, hival, density, col#, spare1, spare2, avgcln from hist_head$ where obj#=:1 and intcol#=:2select /*+ rule */ bucket, endpoint, col#, epvalue from histgrm$ where obj#=:1 and intcol#=:2 and row#=:3 order by bucketselect obj#,type#,ctime,mtime,stime, status, dataobj#, flags, oid$, spare1, spare2 from obj$ where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is nullselect t.ts#,t.file#,t.block#,nvl(t.bobj#,0),nvl(t.tab#,0),t.intcols,nvl(t.clucols,0),t.audit$,t.flags,t.pctfree$,t.pctused$,t.initrans,t.maxtrans,t.rowcnt,t.blkcnt,t.empcnt,t.avgspc,t.chncnt,t.avgrln,t.analyzetime,t.samplesize,t.cols,t.property,nvl(t.degree,1),nvl(t.instances,1),t.avgspc_flb,t.flbcnt,t.kernelcols,nvl(t.trigflag, 0),nvl(t.spare1,0),nvl(t.spare2,0),t.spare4,t.spare6,ts.cachedblk,ts.cachehit,ts.logicalread from tab$ t, tab_stats$ ts where t.obj#= :1 and t.obj# = ts.obj# (+)select o.owner#,o.name,o.namespace,o.remoteowner,o.linkname,o.subname from obj$ o where o.obj#=:1select col#, grantee#, privilege#,max(mod(nvl(option$,0),2)) from objauth$ where obj#=:1 and col# is not null group by privilege#, col#, grantee# order by col#, grantee#select grantee#,privilege#,nvl(col#,0),max(mod(nvl(option$,0),2))from objauth$ where obj#=:1 group by grantee#,privilege#,nvl(col#,0) order by grantee#select con#,obj#,rcon#,enabled,nvl(defer,0),spare2,spare3 from cdef$ where robj#=:1select con#,type#,condlength,intcols,robj#,rcon#,match#,refact,nvl(enabled,0),rowid,cols,nvl(defer,0),mtime,nvl(spare1,0),spare2,spare3 from cdef$ where obj#=:1select intcol#,nvl(pos#,0),col#,nvl(spare1,0) from ccol$ where con#=:1select count(*) from ( select 1 from ind$ i, jijoin$ j where i.bo# = tab1obj# and j.obj#=i.obj# and j.tab2obj#=:1 union all select 1 from ind$ i, jijoin$ j where i.bo# = tab2obj# and j.obj#=i.obj# and j.tab1obj#=:1)select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0) from icol$ ic,col$ c,ind$ i where i.bo# = :2 and i.obj# in(select i.obj# from ind$ i,icol$ ic where i.bo#=:2 and i.obj#=ic.obj# and ic.intcol#=:3)and i.obj# = ic.obj# and ic.bo# = :2 and ic.intcol# = c.intcol# and c.obj# = :2 group by i.obj#, i.type#, i.initrans, i.pctthres$selectnvl((decode(i.type#,2,:1,0)+sum(c.length+decode(sign(c.length-128),-1,1,2))),0),i.obj#,i.initrans,i.type#,decode(i.type#,4,MOD(i.pctthres$,256),0),i.bo#fromind$i,jijoin$j,icol$ic,col$cwherei.obj#=j.obj#andi.bo#=j.tab1obj#andi.obj#=ic.obj#andc.obj#=ic.bo#andc.col#=ic.col#andj.tab2obj#=:2groupbyi.obj#,i.type#,i.initrans,i.pctthres$,i.bo#select 1 from icol$ c,ind$ i where i.bo#=:1 and i.type# = 9 and i.obj#=c.obj# and c.intcol# in (select c1.intcol# from col$ c1 where c1.obj#=:1 and c1.col#=:2)SELECT /* OPT_DYN_SAMP */ /*+ ALL_ROWS IGNORE_WHERE_CLAUSE NO_PARALLEL(SAMPLESUB) opt_param('parallel_execution_enabled', 'false') NO_PARALLEL_INDEX(SAMPLESUB) NO_SQL_TUNE */ NVL(SUM(C1),0), NVL(SUM(C2),0) FROM (SELECT /*+ IGNORE_WHERE_CLAUSE NO_PARALLEL("TEST") FULL("TEST") NO_PARALLEL_INDEX("TEST") */ 1 AS C1, CASE WHEN LENGTHB("TEST"."C1")>20 THEN 1 ELSE 0 END AS C2 FROM "BISAL"."TEST" "TEST") SAMPLESUBselect /*+ first_rows */ 1 from "BISAL"."TEST" where LENGTHB("C1") > 20select 1 from icoldep$ i where i.bo#= :1 and i.intcol#= :2 and i.obj# NOT IN (select j.obj# from jijoin$ j) select 1 from partcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2select 1 from subpartcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2delete from idl_ub1$ where obj#=:1delete from idl_char$ where obj#=:1delete from idl_ub2$ where obj#=:1delete from idl_sb4$ where obj#=:1delete from error$ where obj#=:1select o.owner#, o.name, o.namespace, o.obj#, d.d_timestamp, nvl(d.property,0), o.type#, o.subname, d.d_attrs from dependency$ d, obj$ o where d.p_obj#=:1 and (d.p_timestamp=nvl(:2,d.p_timestamp) or d.property=2) and o.owner#=nvl(:3,o.owner#) and d.d_obj#=o.obj# order by o.obj#update dependency$ set p_timestamp=:1, p_obj#=:2 where d_obj#=:3 and p_obj#=:4update dependency$ set d_reason = :1 where d_obj# = :2 and p_obj# = :3update dependency$ set d_attrs = :1 where d_obj# = :2 and p_obj# = :3delete from superobj$ where subobj# = :1update tab$ set ts#=:2,file#=:3,block#=:4,bobj#=decode(:5,0,null,:5),tab#=decode(:6,0,null,:6),intcols=:7,kernelcols=:8,clucols=decode(:9,0,null,:9),audit$=:10,flags=:11,pctfree$=:12,pctused$=:13,initrans=:14,maxtrans=:15,rowcnt=:16,blkcnt=:17,empcnt=:18,avgspc=:19,chncnt=:20,avgrln=:21,analyzetime=:22,samplesize=:23,cols=:24,property=:25,degree=decode(:26,1,null,:26),instances=decode(:27,1,null,:27),dataobj#=:28,avgspc_flb=:29,flbcnt=:30,trigflag=:31,spare1=:32,spare2=decode(:33,0,null,:33),spare4=:34,spare6=:35 where obj#=:1update col$ set intcol#=:3,segcol#=:4,type#=:5,length=:6,precision#=decode(:5,182/*DTYIYM*/,:7,183/*DTYIDS*/,:7,decode(:7,0,null,:7)),scale=decode(:5,2,decode(:8,-127/*MAXSB1MINAL*/,null,:8),178,:8,179,:8,180,:8,181,:8,182,:8,183,:8,231,:8,null),null$=:9,fixedstorage=:10,segcollength=:11,col#=:12,property=:13,charsetid=:14,charsetform=:15,spare1=:16,spare2=:17,spare3=:18,deflength=decode(:19,0,null,:19),default$=:20 where obj#=:1 and name=:2update obj$ set obj#=:4, type#=:5,ctime=:6,mtime=:7,stime=:8,status=:9,dataobj#=:10,flags=:11,oid$=:12,spare1=:13, spare2=:14 where owner#=:1 and name=:2 and namespace=:3 and remoteowner is null and linkname is null and subname is nullselect count(FA#) from SYS_FBA_TRACKEDTABLES where OBJ# = :1 and bitand(FLAGS, :2)=0LOCK TABLE "TEST" IN EXCLUSIVE MODE NOWAIT

其中值得关注的，有如下几条，这条SQL会根据c1字段的字节长度是否超过20来设置1或者0，

SELECT /* OPT_DYN_SAMP */ /*+ ALL_ROWS IGNORE_WHERE_CLAUSE NO_PARALLEL(SAMPLESUB) opt_param('parallel_execution_enabled','false')NO_PARALLEL_INDEX(SAMPLESUB)NO_SQL_TUNE*/NVL(SUM(C1),0),NVL(SUM(C2),0)FROM(SELECT/*+IGNORE_WHERE_CLAUSENO_PARALLEL("TEST")FULL("TEST")NO_PARALLEL_INDEX("TEST")*/1ASC1,CASEWHENLENGTHB("TEST"."C1")>20THEN1ELSE0ENDASC2FROM"BISAL"."TEST""TEST")SAMPLESUB

这条SQL应该是找到所有c1的字节长度超过20个记录，

select /*+ first_rows */ 1 from "BISAL"."TEST" where LENGTHB("C1") > 20

以上两个操作，都是用全表扫描，而且还会显式LOCK这张表，

LOCK TABLE "TEST" IN EXCLUSIVE MODE NOWAIT

另外，还需要更新tab$、col$、obj$这些数据字典，

update tab$ set ts#=:2,file#=:3,block#=:4,bobj#=decode(:5,0,null,:5),tab#=decode(:6,0,null,:6),intcols=:7,kernelcols=:8,clucols=decode(:9,0,null,:9),audit$=:10,flags=:11,pctfree$=:12,pctused$=:13,initrans=:14,maxtrans=:15,rowcnt=:16,blkcnt=:17,empcnt=:18,avgspc=:19,chncnt=:20,avgrln=:21,analyzetime=:22,samplesize=:23,cols=:24,property=:25,degree=decode(:26,1,null,:26),instances=decode(:27,1,null,:27),dataobj#=:28,avgspc_flb=:29,flbcnt=:30,trigflag=:31,spare1=:32,spare2=decode(:33,0,null,:33),spare4=:34,spare6=:35 where obj#=:1update col$ set intcol#=:3,segcol#=:4,type#=:5,length=:6,precision#=decode(:5,182/*DTYIYM*/,:7,183/*DTYIDS*/,:7,decode(:7,0,null,:7)),scale=decode(:5,2,decode(:8,-127/*MAXSB1MINAL*/,null,:8),178,:8,179,:8,180,:8,181,:8,182,:8,183,:8,231,:8,null),null$=:9,fixedstorage=:10,segcollength=:11,col#=:12,property=:13,charsetid=:14,charsetform=:15,spare1=:16,spare2=:17,spare3=:18,deflength=decode(:19,0,null,:19),default$=:20 where obj#=:1 and name=:2updateobj$setobj#=:4,type#=:5,ctime=:6,mtime=:7,stime=:8,status=:9,dataobj#=:10,flags=:11,oid$=:12,spare1=:13,spare2=:14whereowner#=:1andname=:2andnamespace=:3andremoteownerisnullandlinknameisnullandsubnameisnull

我们看下char->char，从逻辑上讲，他只是进行长度的扩容，不涉及类型，

alter table test modify c2 varchar2(30);

10046显示执行的SQL明显比char->byte要少，而且不存在对原表的任何访问，不需要更新tab$、col$、obj$等数据字典，不需要LOCK，

select count(*) from ind$ i where i.bo#=:1select count(*) from ( select 1 from ind$ i, jijoin$ j where i.bo# = tab1obj# and j.obj#=i.obj# and j.tab2obj#=:1 union all select 1 from ind$ i, jijoin$ j where i.bo# = tab2obj# and j.obj#=i.obj# and j.tab1obj#=:1)select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0) from icol$ ic,col$ c,ind$ i where i.bo# = :2 and i.obj# in(select i.obj# from ind$ i,icol$ ic where i.bo#=:2 and i.obj#=ic.obj# and ic.intcol#=:3)and i.obj# = ic.obj# and ic.bo# = :2 and ic.intcol# = c.intcol# and c.obj# = :2 group by i.obj#, i.type#, i.initrans, i.pctthres$select nvl((decode(i.type#, 2, :1, 0) + sum(c.length + decode(sign(c.length - 128), -1, 1, 2))),0), i.obj#, i.initrans, i.type#, decode(i.type#, 4, MOD(i.pctthres$, 256), 0), i.bo# from ind$ i, jijoin$ j, icol$ ic, col$ c where i.obj# = j.obj# and i.bo# = j.tab1obj# and i.obj# = ic.obj# and c.obj# = ic.bo# and c.col# = ic.col# and j.tab2obj#=: 2 group by i.obj#, i.type#, i.initrans, i.pctthres$, i.bo#select 1 from icol$ c,ind$ i where i.bo#=:1 and i.type# = 9 and i.obj#=c.obj# and c.intcol# in (select c1.intcol# from col$ c1 where c1.obj#=:1 and c1.col#=:2)select 1 from icoldep$ i where i.bo#= :1 and i.intcol#= :2 and i.obj# NOT IN (select j.obj# from jijoin$ j) select 1 from partcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2select 1 from subpartcol$ c,ind$ i where i.bo#=:1 and i.obj#=c.obj# and c.intcol#=:2delete from idl_ub1$ where obj#=:1deletefromidl_ub2$whereobj#=:1deletefromidl_sb4$whereobj#=:1delete from error$ where obj#=:1select o.owner#, o.name, o.namespace, o.obj#, d.d_timestamp, nvl(d.property,0), o.type#, o.subname, d.d_attrs from dependency$ d, obj$ o where d.p_obj#=:1 and (d.p_timestamp=nvl(:2,d.p_timestamp) or d.property=2) and o.owner#=nvl(:3,o.owner#) and d.d_obj#=o.obj# order by o.obj#update dependency$ set p_timestamp=:1, p_obj#=:2 where d_obj#=:3 and p_obj#=:4update dependency$ set d_reason = :1 where d_obj# = :2 and p_obj# = :3updatedependency$setd_attrs=:1whered_obj#=:2andp_obj#=:3delete from superobj$ where subobj# = :1deletefromtab_stats$whereobj#=:1

char->byte或者byte->char，数据库需要做更多的操作，还需要进行数据的校验，如果对大表进行这种转换，从执行时间上，就会更久，而且LOCK表的操作，就会影响表的事务并发，进而对系统产生一定的影响。而单纯的char->char或者byte->byte的扩容，仅需要更新一些数据字典，不存在LOCK的需求，不需要读取原表，执行时间上，自然就很快。

归根结底，我认为实际场景中不太可能出现主动char->byte或者byte->char的场景，大多情况下，可能都是“误伤”，例如某个库，初始参数NLS_LENGTH_SEMANTICS设置成了按照byte存储，迁移到的新库，NLS_LENGTH_SEMANTICS设置成了按照char存储，这就涉及到转换。或者测试环境NLS_LENGTH_SEMANTICS按照char存储，生产环境NLS_LENGTH_SEMANTICS按照byte存储，准备上线SQL就忽略了字段应该带着的char或者byte，选择使用默认的，这就很容易“误伤”，生产上执行了，还可能都不知道他的影响，系统并发受到影响的同时，字段存储容量很可能不满足需求。归根结底，这些都是设计开发规范不严谨导致的，前提还是得知道他的原理，只是刻意复制，很可能导致其他的隐患问题。

近期更新的文章：

《小白学习MySQL - 大小写敏感问题解惑》

《小白学习MySQL - only_full_group_by的校验规则》

《最近碰到的几个问题》

《PG逻辑复制的REPLICA IDENTITY设置》

《Linux的dd指令》

文章分类和索引：

《公众号800篇文章分类和索引》

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。