create or replace
procedure ext_stats_finder_i (
p_db_user varchar2 ,
p_min_card number default null,
p_max_card_percent number default null ,
p_parallelism number default null,
p_sample_percentage number default null )
as
type l_query is table of varchar2(32000) index by binary_integer;
type l_result is table of varchar2(32000) index by binary_integer;
type l_sql_text is table of varchar2(32000) index by binary_integer;
l_q l_query;
l_r l_result ;
l_sql l_sql_text ;
l_sql_n clob;
l_corr number;
l_max_card number;
l_left varchar2(32000);
l_val varchar2(32000);
l_frst_time number;
l_cnt number;
l_dummy number;
l_c_name number ;
l_d number ;
col_cnt integer;
rec_tab dbms_sql.desc_tab2;
col_num number ;
r number;
cnt number;
l_res number;
l_min_card number;
l_max_card_percent number;
l_col1_card number;
l_col2_card number;
l_used_in_sql number;
begin
if p_min_card is null then
l_min_card:=0;
else
l_min_card:=p_min_card;
end if;
if p_max_card_percent is null then
l_max_card_percent:=100;
else
l_max_card_percent:=p_max_card_percent;
end if;
–Loop that goes through all tables
for tbls in (select
table_name ,
num_rows
from
dba_tables
where
owner = p_db_user
order by table_name
) loop
if (p_parallelism is null) then
l_sql_n:=’ select ‘;
else
l_sql_n:=’ select /*+ parallel (‘||tbls.table_name||’,'||p_parallelism||’ ) */ ‘;
end if;
l_frst_time:=1;
l_max_card:=floor(l_max_card_percent*tbls.num_rows/100);
– Loop that goes through all suitable column (as defined by l_min_card and l_max_card )
for col1 in (select
column_name ,
column_id ,
rownum
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and num_distinct > l_min_card
and num_distinct <= l_max_card
order by column_id ) loop
– Loop that goes through columns that have column ID less than one from the outer loop
for col2 in (select
column_name ,
rownum
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and num_distinct > l_min_card
and column_id < col1.column_id
and num_distinct <= l_max_card
order by column_id ) loop
– Construct the SQL to get the numbner of distinct values for the “combined column”
if l_frst_time= 1 then
l_frst_time:=0;
l_sql_n:=l_sql_n||’ count( distinct( sys_op_combined_hash(‘||col1.column_name||’,'||col2.column_name||’)))’;
else
l_sql_n:=l_sql_n||’, count( distinct( sys_op_combined_hash(‘||col1.column_name||’,'||col2.column_name||’)))’;
end if;
end loop;
end loop;
if ( l_frst_time = 0 ) then
l_sql_n:=l_sql_n||’ from ‘||p_db_user||’.'||tbls.table_name;
if p_sample_percentage is not null then
l_sql_n:=l_sql_n||’ sample (‘||to_char(p_sample_percentage)||’)';
end if ;
l_c_name := dbms_sql.open_cursor;
dbms_sql.parse(l_c_name, l_sql_n, dbms_sql.native);
l_cnt:=0;
– The following two loops define the output columns (the query is dynamic)
for col1 in (select column_name ,
column_id ,
rownum
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and num_distinct > l_min_card
and num_distinct <= l_max_card
order by column_id ) loop
for col2 in (select column_name ,
rownum
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and num_distinct > l_min_card
and column_id < col1.column_id
and num_distinct <= l_max_card
order by column_id ) loop
l_cnt:=l_cnt+1;
dbms_sql.define_column (l_c_name, l_cnt ,l_res );
end loop;
end loop;
l_d := dbms_sql.execute(l_c_name);
dbms_sql.describe_columns2(l_c_name ,col_cnt , rec_tab);
col_num := rec_tab.first;
if (col_num is not null) then
r:= dbms_sql.fetch_rows (l_c_name);
end if;
l_cnt:=0;
for col1 in (select column_name ,
column_id ,
rownum
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and num_distinct > l_min_card
and num_distinct <= l_max_card
order by column_id ) loop
for col2 in (select column_name ,
rownum
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and num_distinct > l_min_card
and column_id < col1.column_id
and num_distinct <= l_max_card
order by column_id ) loop
l_cnt:=l_cnt+1;
dbms_sql.column_value (l_c_name, l_cnt ,l_res );
if p_sample_percentage is not null then
l_res:=floor((l_res*100)/p_sample_percentage);
end if;
l_res:=floor(l_res);
– Get the number of distinct values for the first column
select num_distinct
into l_col1_card
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and column_name = col1.column_name;
– Get the number of distinct values for the second column
select num_distinct
into l_col2_card
from
dba_tab_columns
where
owner = p_db_user
and table_name = tbls.table_name
and column_name = col2.column_name;
– How you how many queries refer to both columns
– This is an approximate approach
select count(sql_id)
into l_used_in_sql
from
dba_hist_sqltext
where
dbms_lob.instr(upper(sql_text), col1.column_name, dbms_lob.instr(upper(sql_text ) , tbls.table_name) ) > 0
and dbms_lob.instr(upper(sql_text), col2.column_name, dbms_lob.instr(upper(sql_text ) , tbls.table_name) ) > 0
and dbms_lob.instr(upper(sql_text) , tbls.table_name) > 0 ;
– Record results in a table
if l_res = 0 then
insert into
column_pairs ( db_user_name ,table_name ,column_name_1 ,column_name_2,err ,created_date , user_by_sql)
values (p_db_user , tbls.table_name , col1.column_name , col2.column_name ,0 , sysdate ,l_used_in_sql );
else
insert into
column_pairs ( db_user_name ,table_name ,column_name_1 ,column_name_2,err ,created_date , user_by_sql)
values (p_db_user , tbls.table_name , col1.column_name , col2.column_name ,floor((l_col1_card* l_col2_card)/l_res) , sysdate , l_used_in_sql );
end if;
commit;
end loop;
end loop;
dbms_sql.close_cursor(l_c_name);
end if;
end loop;
end ext_stats_finder_i;
/