ext_stats_finder_i.sql

create or replace
procedure ext_stats_finder_i (
p_db_user           varchar2 ,
p_min_card          number default null,
p_max_card_percent  number default null ,
p_parallelism       number default null,
p_sample_percentage number default null )
as
type l_query    is table of varchar2(32000) index by binary_integer;
type l_result   is table of varchar2(32000) index by binary_integer;
type l_sql_text is table of varchar2(32000) index by binary_integer;
l_q             l_query;
l_r             l_result  ;
l_sql           l_sql_text ;
l_sql_n         clob;
l_corr          number;
l_max_card      number; 
l_left          varchar2(32000);
l_val           varchar2(32000);
l_frst_time     number;
l_cnt           number;
l_dummy         number;
l_c_name        number ;
l_d             number ; 
col_cnt         integer;
rec_tab         dbms_sql.desc_tab2;
col_num         number ;
r               number;
cnt             number;
l_res           number;
l_min_card      number;
l_max_card_percent number;
l_col1_card     number;
l_col2_card     number;
l_used_in_sql   number;
begin
  if p_min_card is null then
    l_min_card:=0;
  else 
     l_min_card:=p_min_card;
  end if;
  if p_max_card_percent is null then
    l_max_card_percent:=100;
  else
    l_max_card_percent:=p_max_card_percent;
  end if;
–Loop that goes through all tables  
  for tbls in (select
                  table_name ,
                  num_rows
              from
                  dba_tables 
              where
                  owner = p_db_user
              order by table_name
              ) loop
    if (p_parallelism is null) then
      l_sql_n:=’ select ‘;
    else
      l_sql_n:=’ select /*+ parallel (‘||tbls.table_name||’,’||p_parallelism||’ ) */ ‘;
    end if;
    l_frst_time:=1;
    l_max_card:=floor(l_max_card_percent*tbls.num_rows/100);
— Loop that goes through all suitable column (as defined by l_min_card  and l_max_card  )
    for col1 in (select
                    column_name ,
                    column_id ,
                    rownum 
                 from
                    dba_tab_columns
                 where
                    owner           = p_db_user
                    and table_name  = tbls.table_name
                    and num_distinct > l_min_card
                    and num_distinct <= l_max_card 
                 order by column_id ) loop
— Loop that goes through columns that have column ID less than one from the outer loop                 
      for col2 in (select
                      column_name ,
                      rownum
                   from
                      dba_tab_columns
                   where
                       owner         = p_db_user
                   and table_name   = tbls.table_name
                   and num_distinct   > l_min_card
                   and column_id    < col1.column_id 
                   and num_distinct <= l_max_card 
                   order by column_id ) loop
— Construct the SQL to get the numbner of distinct values for the “combined column”                  
            if l_frst_time= 1 then
              l_frst_time:=0;
              l_sql_n:=l_sql_n||’ count( distinct( sys_op_combined_hash(‘||col1.column_name||’,’||col2.column_name||’)))’;
            else
              l_sql_n:=l_sql_n||’, count( distinct( sys_op_combined_hash(‘||col1.column_name||’,’||col2.column_name||’)))’;
            end if;
      end loop;
    end loop;
    if ( l_frst_time = 0  )  then
      l_sql_n:=l_sql_n||’ from ‘||p_db_user||’.’||tbls.table_name;
      if  p_sample_percentage is not null then
        l_sql_n:=l_sql_n||’ sample (‘||to_char(p_sample_percentage)||’)’;
      end if ;
      l_c_name := dbms_sql.open_cursor;
      dbms_sql.parse(l_c_name, l_sql_n, dbms_sql.native);
      l_cnt:=0;
— The following two loops define the output columns (the query is dynamic)
    for col1 in (select column_name ,
                        column_id ,
                        rownum 
                from
                        dba_tab_columns
                where
                        owner       = p_db_user
                and     table_name  = tbls.table_name
                and     num_distinct > l_min_card
                and     num_distinct <= l_max_card
                order by column_id ) loop
        for col2 in (select column_name ,
                            rownum
                    from
                            dba_tab_columns
                    where
                            owner       = p_db_user
                    and     table_name  = tbls.table_name
                    and     num_distinct > l_min_card
                    and     column_id    < col1.column_id
                    and     num_distinct <= l_max_card
                    order by column_id ) loop
          l_cnt:=l_cnt+1;
          dbms_sql.define_column (l_c_name, l_cnt ,l_res );
        end loop;
    end loop;
    l_d := dbms_sql.execute(l_c_name);
    dbms_sql.describe_columns2(l_c_name ,col_cnt , rec_tab);
    col_num := rec_tab.first;
    if (col_num is not null) then
      r:= dbms_sql.fetch_rows (l_c_name);
  end if;
  l_cnt:=0; 
  for col1 in (select column_name ,
                       column_id ,
                       rownum 
                from
                      dba_tab_columns
                where
                      owner     = p_db_user
                and   table_name = tbls.table_name
                and   num_distinct > l_min_card 
                and   num_distinct <= l_max_card
                order by column_id ) loop
        for col2 in (select column_name ,
                            rownum
                     from
                            dba_tab_columns
                     where
                            owner       = p_db_user
                     and    table_name  = tbls.table_name
                     and    num_distinct > l_min_card 
                     and    column_id   < col1.column_id
                     and    num_distinct <= l_max_card  
                     order by column_id ) loop
              l_cnt:=l_cnt+1;
              dbms_sql.column_value (l_c_name, l_cnt ,l_res );
              if  p_sample_percentage is not null then
                l_res:=floor((l_res*100)/p_sample_percentage);
              end if;
              l_res:=floor(l_res);
— Get the number of distinct values for the first column
            select num_distinct
            into  l_col1_card
            from 
                  dba_tab_columns
            where
                  owner       = p_db_user
            and   table_name  =   tbls.table_name
            and   column_name = col1.column_name;
— Get the number of distinct values for the second column
            select num_distinct
            into  l_col2_card
            from 
                  dba_tab_columns
            where
                  owner       = p_db_user
            and   table_name  =   tbls.table_name
            and   column_name = col2.column_name;
— How you how many queries refer to both columns
— This is an approximate approach
            select count(sql_id)
            into l_used_in_sql
            from
                dba_hist_sqltext
            where
                dbms_lob.instr(upper(sql_text), col1.column_name, dbms_lob.instr(upper(sql_text ) , tbls.table_name) ) > 0
            and dbms_lob.instr(upper(sql_text), col2.column_name, dbms_lob.instr(upper(sql_text ) , tbls.table_name) ) > 0
            and dbms_lob.instr(upper(sql_text) , tbls.table_name) > 0 ; 
— Record results in a table           
          if l_res = 0 then
            insert into
                 column_pairs ( db_user_name ,table_name ,column_name_1 ,column_name_2,err ,created_date , user_by_sql)
                values (p_db_user , tbls.table_name , col1.column_name , col2.column_name ,0 , sysdate ,l_used_in_sql );
          else
            insert into
             column_pairs ( db_user_name ,table_name ,column_name_1 ,column_name_2,err ,created_date , user_by_sql)
            values (p_db_user , tbls.table_name , col1.column_name , col2.column_name ,floor((l_col1_card* l_col2_card)/l_res) , sysdate , l_used_in_sql );
          end if;
        commit;
      end loop;
    end loop;
        dbms_sql.close_cursor(l_c_name);
    end if;
  end loop;
end ext_stats_finder_i;
/

Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: