xibuhaohao
有的时候无法使用Oracle自带的OEM监控,那么就需要确定一个监控方案。
此方案,使用Python+Prometheus+Grafana+Oracle
1.监控配置表
-- Create table
create table DB_MONITOR_TAB
(
  ID                  NUMBER,
  MONITOR_CLASS       VARCHAR2(50),
  MONITOR_NAME        VARCHAR2(50) not null,
  MONITOR_SQL         VARCHAR2(1000),
  CREATED_TIME        DATE default sysdate,
  STATUS              NUMBER default \'0\',
  MONITOR_INSTRUCTION VARCHAR2(100)
)
tablespace DBADMIN
  pctfree 10
  initrans 1
  maxtrans 255
  storage
  (
    initial 64K
    next 1M
    minextents 1
    maxextents unlimited
  );
-- Add comments to the table
comment on table DB_MONITOR_TAB
  is \'Oracle监控配置表\';
-- Add comments to the columns
comment on column DB_MONITOR_TAB.ID
  is \'序号\';
comment on column DB_MONITOR_TAB.MONITOR_CLASS
  is \'监控类别\';
comment on column DB_MONITOR_TAB.MONITOR_NAME
  is \'监控事项\';
comment on column DB_MONITOR_TAB.MONITOR_SQL
  is \'监控事项的SQL\';
comment on column DB_MONITOR_TAB.CREATED_TIME
  is \'监控事项创建时间\';
comment on column DB_MONITOR_TAB.STATUS
  is \'监控事项是否开启 1:开启,0:不开启\';
-- Create/Recreate primary, unique and foreign key constraints
alter table DB_MONITOR_TAB
  add constraint PK_MONITOR_NAME primary key (MONITOR_NAME)
  using index
  tablespace DBADMIN
  pctfree 10
  initrans 2
  maxtrans 255
  storage
  (
    initial 64K
    next 1M
    minextents 1
    maxextents unlimited
  );
 
2.插入监控数据
insert into db_monitor_tab values (1,\'session\',\'v_session\',\'select initcap(s.TYPE)||\' \'||initcap(s.STATUS) status,count(s.STATUS) from v$session s group by  s.TYPE,s.STATUS order by status\',sysdate,1,\'当前session个数\')
insert into db_monitor_tab values (10,\'archive_log_info\',\'v_archive_log_info\',\'select \'Archive_Log_Size\' || \'(MB)\',case when trunc(sum(s.BLOCKS * s.BLOCK_SIZE)/1024/1024, 2) is null then 0 else trunc(sum(s.BLOCKS * s.BLOCK_SIZE)/1024/1024,2) end from v$archived_log s where s.DEST_ID = 1 and s.COMPLETION_TIME >= trunc(sysdate) and s.COMPLETION_TIME < trunc(sysdate + 1)\',sysdate,1,\'每天归档日志大小(MB)\')
insert into db_monitor_tab values (11,\'archive_log_info\',\'v_archive_log_count\',\'select \'Archive_Log_Count\',count(1) from v$archived_log s where s.DEST_ID = 1 and s.COMPLETION_TIME >= trunc(sysdate) and s.COMPLETION_TIME < trunc(sysdate + 1)\',sysdate,1,\'每天归档日志数量\')
insert into db_monitor_tab values (12,\'lock_info\',\'v_locke_count\',\'select \'Lock_Count\', count(1) from v$session s where s.STATUS=\'ACTIVE\' AND s.LOCKWAIT is not null and s.BLOCKING_SESSION_STATUS=\'VALID\'\',sysdate,1,\'行锁数量\')
insert into db_monitor_tab values (13,\'redo_info\',\'v_redo_info\',\'SELECT initcap(NAME), VALUE FROM V$SYSSTAT where NAME in  (\'redo entries\',\'redo buffer allocation retries\')\',sysdate,1,\'redo情况\')
insert into db_monitor_tab values (14,\'net_info\',\'v_net_info\',\'SELECT initcap(NAME)||\'(KB)\',round(VALUE/1024,2) FROM V$SYSSTAT where NAME in (\'bytes received via SQL*Net from client\',\'bytes sent via SQL*Net to client\')\',sysdate,1,\'数据库数据进出的网络流量\')
insert into db_monitor_tab values (2,\'process\',\'v_process\',\'select \'Process_Count\',count(1) from v$process\',sysdate,1,\'当前process个数\')
insert into db_monitor_tab values (3,\'tablespace_used\',\'v_tablespace_used\',\'SELECT x.tablespace_name||\'(%)\',x.USED_RATE FROM (select * from (select a.TABLESPACE_NAME TABLESPACE_NAME,ROUND((a.total-b.free_space)/a.total*100,2) as USED_RATE from (select TABLESPACE_NAME,sum(bytes/1024/1024) total from dba_data_files group by TABLESPACE_NAME) a,(select TABLESPACE_NAME,sum(bytes/1024/1024) free_space from dba_free_space group by tablespace_name) b where a.TABLESPACE_NAME=b.TABLESPACE_NAME) UNION all select * from (select c.TABLESPACE_NAME TABLESPACE_NAME,ROUND((c.total-d.free_space)/c.total*100,2) as USED_RATE from (select TABLESPACE_NAME,sum(bytes/1024/1024) total from dba_temp_files group by TABLESPACE_NAME) c,(select TABLESPACE_NAME,sum(FREE_SPACE/1024/1024) free_space from dba_temp_free_space group by tablespace_name)d where c.TABLESPACE_NAME=d.TABLESPACE_NAME)) X order by USED_RATE desc\',sysdate,1,\'各个表空间使用情况\')
insert into db_monitor_tab values (4,\'wait_class\',\'v_wait_class\',\'SELECT s.WAIT_CLASS,sum(s.TIME_WAITED/1000) FROM v$system_event s group by s.WAIT_CLASS order by s.WAIT_CLASS\',sysdate,1,\'数据库各类型等待事件统计\')
insert into db_monitor_tab values (5,\'sga_info\',\'v_shared_pool\',\'select sg.NAME||\'(GB)\',trunc(sg.BYTES/1024/1024/1024,2) from v$sgainfo sg where sg.NAME in (\'Buffer Cache Size\',\'Shared Pool Size\') order by sg.name\',sysdate,1,\'shared_pool 内存大小\')
insert into db_monitor_tab values (6,\'sga_info\',\'v_shared_pool_free_size\',\'SELECT \'shared_pool_free\'||\'(GB)\',trunc(sp.BYTES/1024/1024/1024,2) FROM V$SGASTAT sp WHERE NAME = \'free memory\' and pool=\'shared pool\'\',sysdate,1,\'shared_pool 空闲内存大小\')
insert into db_monitor_tab values (7,\'hits_info\',\'v_library_cache_hits\',\'select \'Library_Cache_Hits\'||\'(%)\',trunc(sum(ly.PINHITS)/sum(ly.PINS)*100,2) from v$librarycache ly\',sysdate,1,\'library_cache_hits\')
insert into db_monitor_tab values (8,\'hits_info\',\'v_buffer_pool_cache_hits\',\'SELECT \'Buffer_Pool_Cache_Hits\'||\'(%)\',(trunc(1-(PHYSICAL_READS/(DB_BLOCK_GETS+CONSISTENT_GETS)),2))*100 FROM V$BUFFER_POOL_STATISTICS WHERE NAME=\'DEFAULT\'\',sysdate,1,\'buffer_pool_cache_hits\')
insert into db_monitor_tab values (9,\'hits_info\',\'v_sql_hard_prse_hit\',\'select \'Hard_Parse_Hits\'||\'(%)\',trunc((select st.VALUE from v$sysstat st  where st.NAME=\'parse count (hard)\')/(select st.VALUE from v$sysstat st  where st.NAME=\'parse count (total)\') *100,2) from dual\',sysdate,1,\'sql硬解析的百分比\')
 
3.Python监控程序
cat oracle_status_output.py
from prometheus_client import Gauge,start_http_server
import random
import subprocess
import time
import cx_Oracle

class Oracle_Status_Output:
    def __init__(self,db_sql):
        self.db_name = \'dbadmin\'
        self.db_password = \'QazWsx12\'
        self.db_tns = \'localhost:1521/paydb\'
        self.db_sql = db_sql
    def oracle_status_select(self):
        try:
            db=cx_Oracle.connect(self.db_name,self.db_password,self.db_tns)
            cursor = db.cursor()
            cursor.execute(self.db_sql)
            v_result=cursor.fetchall()
            db.close()
            return v_result
        except Exception as e:
            print(e)
if __name__ == "__main__":
    start_http_server(9800)
    oracleGauge = Gauge(\'oracleGauge\',\'Description of gauge\', [\'mylabelname\'])
    while True:
        try:
            time.sleep(1)
            def Gauge_Values(vsql):
                outvalue = Oracle_Status_Output(vsql).oracle_status_select()
                return outvalue
            jsql = Oracle_Status_Output("select monitor_sql from db_monitor_tab where status=1 and monitor_sql is not null").oracle_status_select()
            for x in range(len(jsql)):
                orasql="".join(tuple(jsql[x]))
                orasqlout = Gauge_Values(orasql)
                def ResultOutput(v_status):
                    for i in range(int(len(v_status))):
                        oracleGauge.labels(mylabelname=v_status[i][0]).set(v_status[i][1])
                ResultOutput(orasqlout)
        except Exception as e:
            print(e)

分类:

技术点:

相关文章: