日期:2014-05-16  浏览次数:20510 次

数据库去除重复记录思路一
大数据量的更新,最好不要用update,可以这么弄:
步骤1:
create table us_publiccomment_01 nologging as select * from us_publiccomment where rowid in(
  select row_id
     from (select t.rowid row_id,
                  row_number() over(partition by t.msisdn,t.contentid , t.commentary order by t.createtime) rn
             from us_publiccomment t)
    where rn = 1
);

步骤2(软删除重复记录,硬删除忽略):
alter table us_publiccomment_01 nologging;
insert into us_publiccomment_01 select msisdn,contentid,commentary,...,'2'(publishstatus),... from us_publiccomment where rowid in(
  select row_id
     from (select t.rowid row_id,
                  row_number() over(partition by t.msisdn,t.contentid , t.commentary order by t.createtime) rn
             from us_publiccomment t)
    where rn > 1
);
commit;
alter table us_publiccomment_01 logging;

步骤3:
rename us_publiccomment to us_publiccomment_bak;
rename us_publiccomment_01 to us_publiccomment;

步骤4:给新表加约束、主键、索引
如:
alter table us_publiccomment modify msisdn not null;
alter table us_publiccomment modify msisdn default '11111111111';
--
alter table us_publiccomment add constraint pk_us_publiccomment primary key (msisdn) using index tablespace tbs_mread_score_idx;

步骤5:
drop table us_publiccomment_bak;