1、检查CPU使用率是否超过90%
[oracle@hisdb1 ~]$ vi chk_cpu.sh
#!/bin/bash
MT_HOME="/home/oracle/monitor"
vmstat_file="$MT_HOME/tmp/vmstat.txt"
dt=`date +%Y%m%d_%H:%M:%S`
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
vmstat 1 10 > $vmstat_file
cat $vmstat_file | tail -10 | awk '{print $(NF-2)}' > /tmp/cpuage.txt
for i in `cat /tmp/cpuage.txt | sed -e 's/\r//g'`
do
if [[ $i -le 10 ]];then
count=$((count+1))
fi
done
if [[ $count -ge 10 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe usage of CPU exceeds 90% for 10s from $dt."
else
output=0
echo "The usage of CPU is normal for 10s from $dt."
fi
exit 0
2、SWAP空间一般空闲,若使用超过20%,则有异常
[oracle@hisdb1 ~]$ vi chk_swap.sh
#!/bin/bash
MT_HOME="/home/oracle/monitor"
swap_file="$MT_HOME/tmp/swap_file.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
free -m > $swap_file
used_size=`cat $swap_file | grep "Swap" | awk -F' ' '{print \$3}' | sed -e 's/\r//g'`
total_size=`cat $swap_file | grep "Swap" | awk -F' ' '{print \$2}' | sed -e 's/\r//g'`
pct=`echo "scale=2; $used_size / $total_size" | bc`
if [[ $pct > 0.2 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe usage of Swap is higher than 20%."
else
output=0
echo "The usage of Swap is normal!"
fi
exit 0
3、表空间使用率超97%
[oracle@hisdb1 ~]$ vi chk_tbs.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
DT=$(date '+%Y-%m-%d %H:%M:%S')
pdbs="$MT_HOME/tmp/pdbs.txt"
tabspace_data="$MT_HOME/tmp/tabspace_data.txt"
max_tab_num="$MT_HOME/tmp/max_tab_num.txt"
max_tablespace="$MT_HOME/tmp/max_tablespace.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
test -f $tabspace_data && >$tabspace_data
test -f $max_tablespace && >$max_tablespace
SMON_COUNT=`ps -ef | grep smon | grep -c $ORACLE_SID`
if (( $SMON_COUNT > 0 ));then
sqlplus -S / as sysdba <<EOF > $pdbs
set linesize 190 pagesize 0;
set feedback off;
set verify off;
show pdbs
EOF
pdbname=`cat $pdbs | awk '{print $2}'`
jud_pdb=`cat $pdbs | grep -iw "PDB$SEED" | wc -l`
if [[ $jud_pdb -eq 1 ]];then
echo "The following is CDB's data." >> $tabspace_data
sqlplus -S / as sysdba <<EOF >> $tabspace_data
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select tablespace_name || '-' || PCT
from
(select a.*,round(PCT_MAX2CURR*PCT_CURR2USED/100) PCT
from (SELECT D.TABLESPACE_NAME,G.BLOCK_SIZE,
DECODE(MAX_SIZE_MB, 0, SPACE, MAX_SIZE_MB) MAX_SIZE_MB,
SPACE CURR_SIZE_MB,
SPACE - NVL(FREE_SPACE, 0) USED_SIZE_MB,
DECODE(FREE_SPACE,NULL,0,FREE_SPACE) FREE_SIZE_MB,
ROUND(SPACE/DECODE(MAX_SIZE_MB, 0, SPACE, MAX_SIZE_MB)*100, 3) PCT_MAX2CURR,
ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 3) PCT_CURR2USED
FROM (SELECT TABLESPACE_NAME,ROUND(SUM(BYTES) / (1024 * 1024)) SPACE,
ROUND(SUM(DECODE(MAXBYTES,0,BYTES,MAXBYTES) / 1024 / 1024)) MAX_SIZE_MB
FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D,
(SELECT TABLESPACE_NAME,ROUND(SUM(BYTES) / (1024 * 1024)) FREE_SPACE
FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F,
DBA_TABLESPACES G
WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME = G.TABLESPACE_NAME
ORDER BY PCT_MAX2CURR*PCT_CURR2USED desc) a)
where rownum=1;
EOF
for i in `echo $pdbname`
do
echo "The following is the PDB $i's data." >> $tabspace_data
sqlplus -S / as sysdba <<EOF >> $tabspace_data
set linesize 190 pagesize 0;
set feedback off;
set verify off;
alter session set container=$i;
select tablespace_name || '-' || PCT
from
(select a.*,round(PCT_MAX2CURR*PCT_CURR2USED/100) PCT
from (SELECT D.TABLESPACE_NAME,G.BLOCK_SIZE,
DECODE(MAX_SIZE_MB, 0, SPACE, MAX_SIZE_MB) MAX_SIZE_MB,
SPACE CURR_SIZE_MB,
SPACE - NVL(FREE_SPACE, 0) USED_SIZE_MB,
DECODE(FREE_SPACE,NULL,0,FREE_SPACE) FREE_SIZE_MB,
ROUND(SPACE/DECODE(MAX_SIZE_MB, 0, SPACE, MAX_SIZE_MB)*100, 3) PCT_MAX2CURR,
ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 3) PCT_CURR2USED
FROM (SELECT TABLESPACE_NAME,ROUND(SUM(BYTES) / (1024 * 1024)) SPACE,
ROUND(SUM(DECODE(MAXBYTES,0,BYTES,MAXBYTES) / 1024 / 1024)) MAX_SIZE_MB
FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D,
(SELECT TABLESPACE_NAME,ROUND(SUM(BYTES) / (1024 * 1024)) FREE_SPACE
FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F,
DBA_TABLESPACES G
WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME = G.TABLESPACE_NAME
ORDER BY PCT_MAX2CURR*PCT_CURR2USED desc) a)
where rownum=1;
EOF
done
else
echo "This is non_CDB circumstances and the following is max tablespace usage detail." >> $tabspace_data
sqlplus -S / as sysdba <<EOF >> $tabspace_data
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select tablespace_name || '-' || PCT
from
(select a.*,round(PCT_MAX2CURR*PCT_CURR2USED/100) PCT
from (SELECT D.TABLESPACE_NAME,G.BLOCK_SIZE,
DECODE(MAX_SIZE_MB, 0, SPACE, MAX_SIZE_MB) MAX_SIZE_MB,
SPACE CURR_SIZE_MB,
SPACE - NVL(FREE_SPACE, 0) USED_SIZE_MB,
DECODE(FREE_SPACE,NULL,0,FREE_SPACE) FREE_SIZE_MB,
ROUND(SPACE/DECODE(MAX_SIZE_MB, 0, SPACE, MAX_SIZE_MB)*100, 3) PCT_MAX2CURR,
ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 3) PCT_CURR2USED
FROM (SELECT TABLESPACE_NAME,ROUND(SUM(BYTES) / (1024 * 1024)) SPACE,
ROUND(SUM(DECODE(MAXBYTES,0,BYTES,MAXBYTES) / 1024 / 1024)) MAX_SIZE_MB
FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D,
(SELECT TABLESPACE_NAME,ROUND(SUM(BYTES) / (1024 * 1024)) FREE_SPACE
FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F,
DBA_TABLESPACES G
WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME = G.TABLESPACE_NAME
ORDER BY PCT_MAX2CURR*PCT_CURR2USED desc) a)
where rownum=1;
EOF
fi
else
output=1
echo -e "\033[31mAttention,please!\033[0mDB has some problem, please check it!"
exit 1
fi
cat $tabspace_data | grep "-" | awk -F'-' '{print $2}' | grep -v "^$" >$max_tab_num
for j in `cat $max_tab_num | sed -e 's/\r//g'`
do
if [[ $j -ge 97 ]];then
count=$((count+1))
cat $tabspace_data | grep $j -B1 >> $max_tablespace
fi
done
max_num=`cat $tabspace_data | awk -F'-' '{print $2}' | sort -rn | head -1 | sed -e 's/\r//g'`
if [[ $count -gt 0 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe max usage of some tablespaces is higher than 97%, please check it!"
echo ""
cat $max_tablespace
else
output=0
echo "The usage of tablespace is \033[32mnormal\033[0m at $DT,the following is the max usage of tablespace."
echo ""
cat $tabspace_data | grep $max_num -B1
fi
exit 0
4、检查是否有失效的分区索引
[oracle@hisdb1 scripts]$ vi chk_part_ind.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
DT=$(date '+%Y-%m-%d %H:%M:%S')
pdbs="$MT_HOME/tmp/pdbs.txt"
ord_part_num="$MT_HOME/tmp/ord_part_num.txt"
ind_part_detail="$MT_HOME/tmp/ind_part_detail.txt"
ind_sub_part_detail="$MT_HOME/tmp/ind_sub_part_detail.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
test -f $ord_part_num && >$ord_part_num
test -f $ind_part_detail && >$ind_part_detail
sqlplus -S / as sysdba <<EOF > $pdbs
set linesize 190 pagesize 0;
set feedback off;
set verify off;
show pdbs
EOF
pdbname=`cat $pdbs | awk '{print $2}'`
jud_pdb=`cat $pdbs | grep -iw "PDB$SEED" | wc -l`
if [[ $jud_pdb -eq 0 ]];then
sqlplus -S / as sysdba <<EOF >> $ord_part_num
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select decode(count(*),-1,min('index+'||index_owner||'.'||index_name||'+unusable,partition_name+'||partition_name),count(*)) from dba_ind_partitions where status not in ('USABLE','N/A');
EOF
echo "The partition index detail of traditional database as below." >> $ind_part_detail
echo "INDEX_OWNER INDEX_NAME PARTITION_NAME" >> $ind_part_detail
sqlplus -S / as sysdba <<EOF >> $ind_part_detail
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col INDEX_OWNER for a25;
col INDEX_NAME for a50;
col PARTITION_NAME for a25;
select index_owner,index_name,partition_name from dba_ind_partitions where status not in ('USABLE','N/A');
EOF
echo "" >> $ind_part_detail
trad_num=`cat $ind_part_detail | wc -l`
if [[ $trad_num -eq 3 ]];then
>$ind_part_detail
fi
else
sqlplus -S / as sysdba <<EOF >> $ord_part_num
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select decode(count(*),-1,min('index+'||index_owner||'.'||index_name||'+unusable,partition_name+'||partition_name),count(*)) from dba_ind_partitions where status not in ('USABLE','N/A');
EOF
echo "The partition index detail of CDB as below." >> $ind_part_detail
echo "INDEX_OWNER INDEX_NAME PARTITION_NAME" >> $ind_part_detail
sqlplus -S / as sysdba <<EOF >> $ind_part_detail
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col INDEX_OWNER for a25;
col INDEX_NAME for a50;
col PARTITION_NAME for a25;
select index_owner,index_name,partition_name from dba_ind_partitions where status not in ('USABLE','N/A');
EOF
echo "" >> $ind_part_detail
cdb_num=`cat $ind_part_detail | wc -l`
if [[ $cdb_num -eq 3 ]];then
>$ind_part_detail
fi
for i in `echo $pdbname`
do
sqlplus -S / as sysdba <<EOF >> $ord_part_num
set linesize 190 pagesize 0;
set feedback off;
set verify off;
alter session set container=$i;
select decode(count(*),-1,min('index+'||index_owner||'.'||index_name||'+unusable,partition_name+'||partition_name),count(*)) from dba_ind_partitions where status not in ('USABLE','N/A');
EOF
done
for i in `echo $pdbname`
do
echo "The partition index detail of PDB $i as below." >> $ind_sub_part_detail
echo "INDEX_OWNER INDEX_NAME PARTITION_NAME" >> $ind_sub_part_detail
sqlplus -S / as sysdba <<EOF >> $ind_sub_part_detail
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col INDEX_OWNER for a25;
col INDEX_NAME for a50;
col PARTITION_NAME for a25;
alter session set container=$i;
select index_owner,index_name,partition_name from dba_ind_partitions where status not in ('USABLE','N/A') order by partition_name;
EOF
echo "" >> $ind_sub_part_detail
pdb_num=`cat $ind_sub_part_detail | wc -l`
if [[ $pdb_num -eq 3 ]];then
>$ind_sub_part_detail
else
cat $ind_sub_part_detail >> $ind_part_detail
>$ind_sub_part_detail
fi
done
fi
ord_ind_invalid=`awk '{sum+=$1}END{print sum}' $ord_part_num`
if [[ $ord_ind_invalid > 0 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThere are some invalid partition indexes in database.The following is the detail."
echo ""
cat $ind_part_detail
else
output=0
echo "Don't worry,the database doesn't have any invalid partition index."
fi
exit 0
5、检查是否有失效的普通索引
[oracle@hisdb1 scripts]$ vi chk_ord_ind.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
DT=$(date '+%Y-%m-%d %H:%M:%S')
pdbs="$MT_HOME/tmp/pdbs.txt"
ord_ind_num="$MT_HOME/tmp/ord_ind_num.txt"
ind_ord_detail="$MT_HOME/tmp/ind_ord_detail.txt"
ind_sub_ord_detail="$MT_HOME/tmp/ind_sub_ord_detail.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
test -f $ord_ind_num && >$ord_ind_num
test -f $ind_ord_detail && >$ind_ord_detail
test -f $ind_sub_ord_detail && >$ind_sub_ord_detail
sqlplus -S / as sysdba <<EOF > $pdbs
set linesize 190 pagesize 0;
set feedback off;
set verify off;
show pdbs
EOF
pdbname=`cat $pdbs | awk '{print $2}'`
jud_pdb=`cat $pdbs | grep -iw "PDB$SEED" | wc -l`
if [[ $jud_pdb -eq 0 ]];then
sqlplus -S / as sysdba <<EOF >> $ord_ind_num
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select decode(count(*),-1,min('index+'||a.owner||'.'||a.index_name||'+unusable,table+'||a.table_owner||','||a.table_name),count(*)) from dba_indexes a where a.status not in ('VALID','N/A');
EOF
echo "The invalid ordinary index detail of traditional database as below." >> $ind_ord_detail
echo "OWNER TABLE_NAME INDEX_NAME" >> $ind_ord_detail
sqlplus -S / as sysdba <<EOF >> $ind_ord_detail
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col owner for a25;
col index_name for a50;
col table_name for a30;
select owner,table_name,index_name from dba_indexes where status not in ('N/A','VALID');
EOF
echo "" >> $ind_ord_detail
trad_num=`cat $ind_ord_detail | wc -l`
if [[ $trad_num -eq 3 ]];then
>$ind_ord_detail
fi
else
sqlplus -S / as sysdba <<EOF >> $ord_ind_num
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select decode(count(*),-1,min('index+'||a.owner||'.'||a.index_name||'+unusable,table+'||a.table_owner||','||a.table_name),count(*)) from dba_indexes a where a.status not in ('VALID','N/A');
EOF
for i in `echo $pdbname`
do
sqlplus -S / as sysdba <<EOF >> $ord_ind_num
set linesize 190 pagesize 0;
set feedback off;
set verify off;
alter session set container=$i;
select decode(count(*),-1,min('index+'||a.owner||'.'||a.index_name||'+unusable,table+'||a.table_owner||','||a.table_name),count(*)) from dba_indexes a where a.status not in ('VALID','N/A');
EOF
done
echo "The invalid ordinary index detail of CDB as below." >> $ind_ord_detail
echo "OWNER TABLE_NAME INDEX_NAME" >> $ind_ord_detail
sqlplus -S / as sysdba <<EOF >> $ind_ord_detail
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col owner for a25;
col index_name for a50;
col table_name for a30;
select owner,table_name,index_name from dba_indexes where status not in ('N/A','VALID');
EOF
echo "" >> $ind_ord_detail
cdb_num=`cat $ind_ord_detail | wc -l`
if [[ $cdb_num -eq 3 ]];then
>$ind_ord_detail
fi
for i in `echo $pdbname`
do
echo "The invalid ordinary index detail of PDB $i as below." >> $ind_sub_ord_detail
echo "OWNER TABLE_NAME INDEX_NAME" >> $ind_sub_ord_detail
sqlplus -S / as sysdba <<EOF >> $ind_sub_ord_detail
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col owner for a25;
col index_name for a50;
col table_name for a30;
alter session set container=$i;
select owner,table_name,index_name from dba_indexes where status not in ('N/A','VALID');
EOF
echo "" >> $ind_sub_ord_detail
pdb_num=`cat $ind_sub_ord_detail | wc -l`
if [[ $pdb_num -eq 3 ]];then
>$ind_sub_ord_detail
else
cat $ind_sub_ord_detail >> $ind_ord_detail
>$ind_sub_ord_detail
fi
done
fi
ord_ind_invalid=`awk '{sum+=$1}END{print sum}' $ord_ind_num`
if [[ $ord_ind_invalid > 0 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThere are some invalid ordinary indexes in database.The following is the detail."
echo ""
cat $ind_ord_detail
else
output=0
echo "Don't worry,the database doesn't have invalid ordinary index."
fi
exit 0
6、检查数据文件数量是否低于DB_FILES参数值的90%
[oracle@hisdb1 scripts]$ vi chk_dbfile.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
total_dbfile="$MT_HOME/tmp/total_dbfile.txt"
pdbs="$MT_HOME/tmp/pdbs.txt"
pdbs_dbfile="$MT_HOME/tmp/pdbs_dbfile.txt"
db_dbfile="$MT_HOME/tmp/db_dbfile.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
sqlplus -S / as sysdba <<EOF > $total_dbfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
show parameter db_files
EOF
sys_dbfile_num=`cat $total_dbfile | awk '{print $3}' | sed -e 's/\r//g'`
sqlplus -S / as sysdba <<EOF > $pdbs
set linesize 190 pagesize 0;
set feedback off;
set verify off;
show pdbs
EOF
pdbname=`cat $pdbs | awk '{print $2}'`
jud_pdb=`cat $pdbs | grep -iw "PDB$SEED" | wc -l`
if [[ $jud_pdb -eq 0 ]];then
sqlplus -S / as sysdba <<EOF >$db_dbfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select count(*) from dba_data_files;
EOF
echo 0 >$pdbs_dbfile
else
for i in `echo $pdbname`
do
sqlplus -S / as sysdba <<EOF >> $pdbs_dbfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
alter session set container=$i;
select count(*) from dba_data_files;
EOF
done
sqlplus -S / as sysdba <<EOF >$db_dbfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select count(*) from dba_data_files;
EOF
fi
db_file=`cat $db_dbfile | sed -e 's/\r//g'`
pdb_file=`awk '{sum+=$1}END{print sum}' $pdbs_dbfile`
total_dbfiles=`echo "$db_file+$pdb_file" | bc`
dbfile_rate=`echo "scale=2; $total_dbfiles / $sys_dbfile_num" | bc`
dbfile_perc=`echo "scale=2; $dbfile_rate * 100" | bc`
if [[ $dbfile_perc > 89 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe usage of dbfile is higher than 90%."
echo -e "The value of the system setting is $sys_dbfile_num,and $total_dbfiles are currently used,so the usage rates is \033[31m$dbfile_perc%\033[0m."
else
output=0
echo "The usage of dbfile is \033[32mnormal\033[0m!"
echo -e "The value of the system setting is $sys_dbfile_num,and $total_dbfiles are currently used,so the usage rates is \033[31m$dbfile_perc%\033[0m."
fi
exit 0
7、查看资源是否是online,特别是db、listener、vip、asm磁盘等资源,若出现offline,则有异常
[grid@hisdb1 scripts]$ vi chk_gires.sh
#!/bin/bash
source /home/grid/.bash_profile
MT_HOME="/home/grid/monitor"
dt=`date +%Y%m%d_%H:%M:%S`
asmdiskname="$MT_HOME/tmp/asmdisk.dat"
gi_status="$MT_HOME/tmp/gi_status.txt"
rac_status="$MT_HOME/tmp/rac_status.txt"
dbname="$MT_HOME/tmp/dbname.txt"
vip_list="$MT_HOME/tmp/vip_list.txt"
#htname=`hostname`
#hostname_length=`expr length $htname`
#res_length=`echo "$hostname_length-1" | bc`
#sim_htname=`echo $htname | cut -c1-$res_length`
test -d /home/grid/monitor/tmp || mkdir -p /home/grid/monitor/tmp
test -f $rac_status || touch $rac_status && >$rac_status
if [ `ps -ef | grep d.bin | wc -l` -gt 15 ];then
sqlplus -S / as sysdba <<EOF > $asmdiskname
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select name from v\$asm_diskgroup;
EOF
test -f $gi_status && >$gi_status
for i in `cat $asmdiskname`
do
/u01/app/19.13/grid/bin/crsctl stat res -t -w "((NAME = ora.$i.dg))" | cut -c16-67 | grep -v "^$" | grep -i line >> $gi_status
/u01/app/19.13/grid/bin/crsctl stat res -t -w "((NAME = ora.$i.dg))" | tail -n +6 >> $rac_status
done
/u01/app/19.13/grid/bin/crsctl stat res -t | grep "ora.*.vip$" | awk -F'.' '{print $2}' >$vip_list
for j in `cat $vip_list`
do
/u01/app/19.13/grid/bin/crsctl stat res -t -w "((NAME = ora.$))" | cut -c16-67 | grep -v "^$" | grep -i line >> $gi_status
/u01/app/19.13/grid/bin/crsctl stat res -t -w "((NAME = ora.$))" | tail -n +6 >> $rac_status
done
/u01/app/19.13/grid/bin/crsctl stat res -t | grep "ora.*.db$" | awk -F'.' '{print $2}' >$dbname
for m in `cat $dbname`
do
/u01/app/19.13/grid/bin/crsctl stat res -t -w "((NAME = ora.$m.db))" | cut -c16-67 | grep -v "^$" | grep -i line >> $gi_status
/u01/app/19.13/grid/bin/crsctl stat res -t -w "((NAME = ora.$m.db))" | tail -n +6 >> $rac_status
done
abno_data=`cat $gi_status | grep -viE "STABLE|Open" | wc -l`
if [[ $abno_data -gt 0 ]];then
output=1
echo -e "\033[31mAttention!\033[0mRAC has some problem, please check it!"
echo ""
echo "The following is the abnormal detail."
cat $rac_status | grep -viE "STABLE|Open" | grep OFFLINE -C1
else
output=0
echo "RAC resource is good!"
fi
else
output=1
echo -e "\033[31mAttention!\033[0mThe quantity of GI's process is less than normal level, please check it!"
fi
exit 0
8、目录使用率超过90%,则需处理
[oracle@hisdb1 scripts]$ vi chk_disk.sh
#!/bin/bash
MT_HOME="/home/oracle/monitor"
diskusage="$MT_HOME/tmp/diskusage.txt"
sdisk_data="$MT_HOME/tmp/sdisk_data.txt"
abn_diskusage="$MT_HOME/tmp/abn_diskusage.txt"
mid_abn_diskusage="$MT_HOME/tmp/mid_abn_diskusage.txt"
fina_abn_diskusage="$MT_HOME/tmp/fina_abn_diskusage.txt"
sam_diskusage="$MT_HOME/tmp/sam_diskusage.txt"
tmp_diskusage="$MT_HOME/tmp/tmp_diskusage.txt"
jud_inum="$MT_HOME/tmp/jud_inum.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
test -f $abn_diskusage || touch $abn_diskusage && >$abn_diskusage
test -f $jud_inum || touch $jud_inum && >$jud_inum
test -f $tmp_diskusage || touch $tmp_diskusage
dt=`date +%Y%m%d_%H:%M:%S`
df -h > $diskusage
cat $diskusage | tail -n +2 | awk '{print $(NF-1),$NF}' | sed 's/%//g' > $sdisk_data
for i in `cat $sdisk_data | awk '{print $1}' | sed -e 's/\r//g'`
do
if [[ $i -ge 90 ]];then
#diskname=`cat /tmp/cpuage.txt | grep $i | awk '{print $2}' | head -1`
#echo -e "\033[31mAttention,please!\033[0mThe usage of $diskname exceeds 90% at $dt."
count=$((count+1))
jud_idt=`cat $sdisk_data | grep -i $i | wc -l`
if [[ $jud_idt -gt 1 ]];then
echo $i >> $jud_inum
else
cat $sdisk_data | grep -i $i >> $abn_diskusage
fi
#cat $diskusage | awk '{print $(NF-1),$NF}' | grep -i $i > $sam_diskusage
#diff_num=`diff $sam_diskusage $tmp_diskusage | wc -l`
#>$tmp_diskusage
#cat $sam_diskusage >> $tmp_diskusage
#if [[ $diff_num -gt 0 ]];then
# cat $sam_diskusage >> $abn_diskusage
#else :
#fi
fi
done
for j in `cat $jud_inum | sort | uniq`
do
cat $sdisk_data | grep -i $j >> $abn_diskusage
done
test -f $mid_abn_diskusage && >$mid_abn_diskusage
test -f $fina_abn_diskusage && >$fina_abn_diskusage
sed 's/^/ /' $abn_diskusage >>$mid_abn_diskusage
sed '1i\Use% Mounted on' $mid_abn_diskusage >>$fina_abn_diskusage
#usage=`cat $abn_diskusage | awk '{print $1}'`
#mountdir=`cat $abn_diskusage | awk '{print $2}'`
if [[ $count -gt 0 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe usage of DISK exceeds 90% at $dt."
echo ""
echo "Here is the out of standard usage details,please check it."
cat $fina_abn_diskusage
else
output=0
echo -e "The usage of disk is \033[32mnormal\033[0m at $dt."
fi
exit 0
9、对比平常进程数量,出现突增的情况则有异常
[oracle@hisdb1 scripts]$ vi chk_process.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
processfile="$MT_HOME/tmp/process.dat"
dt=`date +%Y%m%d_%H:%M:%S`
sqlplus -S / as sysdba <<EOF > $processfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col value for a20
select a.sessions,b.value,round(a.sessions/b.value*100,2) as zhanyong from (select count(*) sessions from v\$session ) a, (select value from v\$parameter where name='processes') b;
EOF
RAT=`cat $processfile | tail -1 | awk '{print $NF}' | sed -e 's/\r//g'`
SESSIONS=`cat $processfile | tail -1 | awk '{print $1}'`
VALUE=`cat $processfile | tail -1 | awk '{print $2}'`
if [[ $RAT > 89 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe utilization rate of current process exceeds 90% at $dt."
echo ""
echo "The following is the detail of process status."
echo "The value of the system setting is $VALUE,and $SESSIONS are currently used."
else
output=0
echo -e "The utilization rate of current process is \033[32mnormal\033[0m at $dt"
echo ""
echo "The following is the detail of process status."
echo "The value of the system setting is $VALUE,and $SESSIONS are currently used."
fi
exit 0
10、检查监听启动时间是否正常,若无监听或未注册则异常
[oracle@hisdb1 scripts]$ vi chk_listener.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
listener_status="$MT_HOME/tmp/listener_status.txt"
dt=`date +%Y%m%d_%H:%M:%S`
SMON_COUNT=`ps -ef | grep smon | grep -c $ORACLE_SID`
if (( $SMON_COUNT > 0 ));then
lsnrctl status > $listener_status
else
output=1
echo -e "\033[31mAttention,please!\033[0mDB has some problem, please check it!"
exit 1
fi
count=`cat $listener_status | grep $ORACLE_SID | wc -l`
if [[ $count -eq 0 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe status of listener is abnormal at $dt."
else
output=0
echo -e "The status of listener is \033[32mnormal\033[0m at $dt."
fi
exit 0
11、检查主备库磁盘组使用情况,使用率超过85%显示异常
[oracle@hisdb1 tmp]$ vi chk_asmdisk.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
asmdiskfile="$MT_HOME/tmp/asmdiskfile.txt"
asmdisk="$MT_HOME/tmp/asmdisk.txt"
abn_diskgroup="$MT_HOME/tmp/abn_diskgroup.txt"
fina_abn_diskgroup="$MT_HOME/tmp/fina_abn_diskgroup.txt"
jud_inum="$MT_HOME/tmp/jud_inum.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
test -f $jud_inum || touch $jud_inum && >$jud_inum
dt=`date +%Y%m%d_%H:%M:%S`
sqlplus -S / as sysdba <<EOF > $asmdiskfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
col name for a15;
select name,total_mb/1024 total_gb,trunc(free_mb/1024) free_gb,trunc((total_mb-free_mb)/total_mb*100) "use%" from v\$asm_diskgroup;
EOF
test -f $abn_diskgroup || >$abn_diskgroup
cat $asmdiskfile | awk '{print $NF}' > $asmdisk
for i in `cat $asmdisk | sed -e 's/\r//g'`
do
if [[ $i -ge 85 ]];then
count=$((count+1))
jud_idt=`cat $asmdiskfile | grep -i $i | wc -l`
if [[ $jud_idt -gt 1 ]];then
echo $i >> $jud_inum
else
cat $asmdiskfile | grep -i $i >> $abn_diskgroup
fi
fi
done
for j in `cat $jud_inum | sort | uniq`
do
cat $asmdiskfile | grep -i $j >> $abn_diskgroup
done
test -f $fina_abn_diskgroup || touch $fina_abn_diskgroup
test -f $fina_abn_diskgroup && >$fina_abn_diskgroup
sed '1i\NAME TOTAL_GB FREE_GB use%' $abn_diskgroup >>$fina_abn_diskgroup
if [[ $count -gt 0 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe usage of asmdisk exceeds 85% at $dt."
echo ""
echo "Here is the out of standard usage details,please check it."
cat $fina_abn_diskgroup
else
output=0
echo "The usage of asmdisk is normal at $dt."
fi
exit 0
12、检查归档目录使用率是否正常(增加归档使用率),使用率超过85%显示异常.
[oracle@hisdb1 scripts]$ vi chk_archdisk.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
archfile="$MT_HOME/tmp/archfile.txt"
asmdiskfile="$MT_HOME/tmp/asmdiskfile.txt"
recofile="$MT_HOME/tmp/recofile.txt"
db_reco_dt="$MT_HOME/tmp/db_reco_dt.txt"
diskusage="$MT_HOME/tmp/diskusage.txt"
dt=`date +%Y%m%d_%H:%M:%S`
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
df -h >$diskusage
sqlplus -S / as sysdba <<EOF > $archfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
archive log list;
EOF
sqlplus -S / as sysdba <<EOF > $recofile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
show parameter recovery;
EOF
cat $recofile | grep -w db_recovery_file_dest | awk '{print $3}' >$db_reco_dt
arch_stat=`cat $archfile | grep -iw "Archive Mode" | wc -l`
if [[ ${arch_stat} -eq 1 ]]
then
arch_jud=`cat $archfile | grep -iw "Archive destination" | grep "+" | wc -l`
else
echo "The DB is running in non-archive mode!"
exit 1
fi
use_db_situ=`cat $archfile | grep -i USE_DB_RECOVERY_FILE_DEST | wc -l`
db_reco_file_dest=`cat $recofile | grep -i db_recovery_file_dest | grep "+" | wc -l`
if [[ $use_db_situ -eq 1 ]] && [[ $db_reco_file_dest -eq 1 ]] || [[ $arch_jud -eq 1 ]];then
sqlplus -S / as sysdba <<EOF > $asmdiskfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select name,total_mb/1024 total_gb,trunc(free_mb/1024) free_gb,trunc((total_mb-free_mb)/total_mb*100) "use%" from v\$asm_diskgroup;
EOF
if [[ $use_db_situ -eq 1 ]] && [[ $db_reco_file_dest -eq 1 ]];then
archdisk=`cat $recofile | grep -iw "db_recovery_file_dest" | awk -F'+' '{print $NF}'`
else
archdisk=`cat $archfile | grep -iw "Archive destination" | awk -F'+' '{print $NF}'`
fi
archrat=`cat $asmdiskfile | awk '{print $(NF-3),$NF}' | grep $archdisk | awk '{print $NF}' | sed -e 's/\r//g'`
arch_dest=`cat $asmdiskfile | awk '{print $1}' | grep $archdisk`
if [[ $archrat -gt 85 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe usage of archive destination exceeds 85% at $dt."
echo ""
echo "The following is the usage detail of archive directory."
echo -e "The archive destination is $arch_dest,and the usage is \033[31m$archrat%\033[0m."
else
output=0
echo -e "The usage of archive directory is \033[32mnormal\033[0m at $dt."
fi
elif [[ $use_db_situ -eq 1 ]] && [[ $db_reco_file_dest -eq 0 ]] || [[ $arch_jud -eq 0 ]];then
if [[ $use_db_situ -eq 1 ]] && [[ $db_reco_file_dest -eq 0 ]];then
archdisk01=`cat $recofile | grep -iw "db_recovery_file_dest" | awk '{print $3}'`
else
archdisk01=`cat $archfile | grep -iw "Archive destination" | awk '{print $3}'`
fi
mid_archdisk=`echo $archdisk01 | awk -F'/' '{print $2}'`
filesys_usage=`cat $diskusage | grep -iw $mid_archdisk | awk '{print $(NF-1)}' | sed 's/%//g' | sed -e 's/\r//g'`
#arch_dest01=`cat $diskusage | grep -iw $mid_archdisk | awk '{print $(NF)}'`
if [[ $filesys_usage -gt 85 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe usage of archive destination exceeds 85% at $dt."
echo ""
echo "The following is the usage detail of archive directory."
echo -e "The archive destination is $archdisk01,and usage is \033[31m$filesys_usage%\033[0m."
else
output=0
echo -e "The usage of archive directory is \033[32mnormal\033[0m at $dt."
fi
fi
exit 0
13、查看RMAN备份情况,若最后一次备份失败,则异常(此项重要务必每天检查)
[oracle@leo-oel150 scripts]$ vi chk_backup.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
backupfile="$MT_HOME/tmp/backupfile.txt"
fina_backupfile="$MT_HOME/tmp/fina_backupfile.txt"
dt=`date +%Y%m%d_%H:%M:%S`
test -f $backupfile && >$backupfile
test -f $fina_backupfile && >$fina_backupfile
for i in 'DB FULL' 'DB INCR' 'DATAFILE FULL' 'DATAFILE INCR' 'ARCHIVELOG' 'CONTROLFILE'
do
sqlplus -S / as sysdba <<EOF >> $backupfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select *
from
(select session_key,
input_type,
status,
to_char(start_time, 'mm/dd/yy hh24:mi') start_time,
to_char(end_time, 'mm/dd/yy hh24:mi') end_time,
round(elapsed_seconds / 3600,2) hrs
from v\$rman_backup_job_details
where input_type='$i'
and start_time>sysdate-30
order by start_time desc)
where rownum<=1;
EOF
done
if [[ `cat $backupfile | wc -l` -gt 0 ]];then
count=`cat $backupfile | grep -Ewi "FAILED|WARNING|ERRORS" | wc -l`
sed '1i\SESSION_KEY INPUT_TYPE STATUS START_TIME END_TIME HRS' $backupfile >>$fina_backupfile
if [[ $count -gt 0 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe backup of DB is failed at $dt."
echo ""
echo "The following is the failed backup detail."
cat $fina_backupfile | grep -Ewi "FAILED|WARNING|ERRORS|STATUS"
else
output=0
echo -e "The backup of DB is \033[32mnormal\033[0m at $dt."
fi
else
echo "It doesn't have any backup of DB,it's not what oracle suggests."
fi
exit 0
14、检查ADG同步情况
[oracle@leo-oel151 scripts]$ vi chk_adg.sh
#!/bin/bash
source /home/oracle/.bash_profile
MT_HOME="/home/oracle/monitor"
dt=`date +%Y%m%d_%H:%M:%S`
apply_file="$MT_HOME/tmp/apply_file.txt"
db_role="$MT_HOME/tmp/db_role.txt"
mrpfile="$MT_HOME/tmp/mrpfile.txt"
mrp_status="$MT_HOME/tmp/mrp_status.txt"
fina_mrp_status="$MT_HOME/tmp/fina_mrp_status.txt"
jud_file="$MT_HOME/tmp/jud_file.txt"
test -d /home/oracle/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
sqlplus -S / as sysdba <<EOF > $jud_file
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select open_mode from v\$database;
EOF
jud_act=`cat $jud_file | grep -i "read" | wc -l`
if [[ $jud_act -eq 1 ]];then
sqlplus -S / as sysdba <<EOF > $db_role
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select database_role from v\$database;
EOF
jud_db_role=`cat $db_role | grep "PHYSICAL STANDBY" | wc -l`
if [[ ${jud_db_role} -eq 1 ]];then
sqlplus -S / as sysdba <<EOF > $mrpfile
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select process,status
from v\$managed_standby;
EOF
mrp_num=`cat $mrpfile | grep MRP0 | wc -l`
cat $mrpfile | grep MRP0 >$mrp_status
sed '1i\PROCESS STATUS' $mrp_status >$fina_mrp_status
if [[ $mrp_num -eq 1 ]];then
abno_mrp_pro=`cat $mrpfile | grep MRP0 | grep -iwE "WAIT_FOR_LOG|WAIT_FOR_GAP" | wc -l | sed -e 's/\r//g'`
if [[ $abno_mrp_pro -eq 1 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mSomething wrong with the MRP process,please check it."
echo ""
echo "The following is the detail."
cat $fina_mrp_status
exit 1
else
sqlplus -S / as sysdba <<EOF > $apply_file
set linesize 190 pagesize 0;
set feedback off;
set verify off;
select name||'='||value
from v\$dataguard_stats
where (substr(value,8,2)>1 or substr(value,5,2)>1)
and name like '%lag';
exit;
EOF
fi
else
output=1
echo -e "\033[31mAttention,please!\033[0mThe ADG doesn't start MRP process."
exit 1
fi
delay_time=`cat $apply_file | grep -i apply | awk -F '=' '{print $2}' | awk '{print $2}'`
delay_trans=`cat $apply_file | grep -i transport | awk -F '=' '{print $2}' | awk '{print $2}'`
count01=`cat $apply_file | grep -i apply | awk -F '=' '{print $2}' | wc -l`
count02=`cat $apply_file | grep -i transport | awk -F '=' '{print $2}' | wc -l`
if [[ $count01 -eq 1 ]] || [[ $count02 -eq 1 ]];then
output=1
echo -e "\033[31mAttention,please!\033[0mThe status of ADG is abnormal at $dt,and archive files apply at physical standby delayed for \033[31m$delay_time\033[0m,archive files transport from primary delayed for \033[31m$delay_trans\033[0m."
exit 1
else
output=0
echo -e "The status of ADG is \033[32mnormal\033[0m at $dt."
fi
else
echo "This isn't slave node,please run this script on slave node!"
exit 1
fi
else
output=1
echo "Something wrong with the DB,please check it ASAP."
fi
exit 0
15、前一天alert日志
[grid@hisdb1 scripts]# vi chk_gi_alert.sh
#!/bin/bash
MT_HOME=/home/grid/monitor
hname=`hostname`
ipaddr=`cat /etc/hosts | grep -w "$hname" | grep -Ev "priv|vip" | awk '{print $1}'`
last_day="`date -d -1day +%Y-%m-%d`"
test -d /home/grid/monitor/log || mkdir -p /home/grid/monitor/log
test -d /home/grid/monitor/tmp || mkdir -p /home/grid/monitor/tmp
### check current day ASM alert log ###
asm_curdy_alert_log="$MT_HOME/log/${ipaddr}_asm_alert_`date +%Y%m%d`.log"
ASM_SID=`ps -ef | grep asm_pmon | grep -v grep | awk -F'_' '{print $3}'`
source /home/grid/.bash_profile
sqlplus -S / as sysdba <<EOF > $MT_HOME/tmp/asm_trace_dir.txt
set linesize 190 pagesize 0;
set feedback off;
select VALUE from v\$diag_info where name='Diag Trace';
EOF
asm_alert_log="`cat $MT_HOME/tmp/asm_trace_dir.txt`/alert_${ASM_SID}.log"
sed -n "/^$last_day/,/GG/"p $asm_alert_log > $asm_curdy_alert_log
#SID=`echo $asm_curdy_alert_log | awk -F'/' '{print $NF}' | awk -F'_' '{print $2}'`
echo "###################IP ADDR:$ipaddr -- SID: $ASM_SID ########################"
err_cnt=`cat $asm_curdy_alert_log | grep -Ei "ora-|error|warn|System state dump|Deadlock" | wc -l`
if [ $err_cnt -eq 0 ];then
output=0
echo "There isn't error message in asm alertfile!"
else
output=1
echo -e "\033[31mAttention,please!\033[0mThere are some error messages in asm alertfile!"
echo ""
cat $asm_curdy_alert_log | grep -Ei "ora-|error|warn|System state dump|Deadlock" -C1
fi
#test -f $asm_curdy_alert_log && rm -f $asm_curdy_alert_log
exit 0
[oracle@hisdb1 scripts]# vi chk_db_alert.sh
#!/bin/bash
MT_HOME=/home/oracle/monitor
hname=`hostname`
ipaddr=`cat /etc/hosts | grep -w "$hname" | grep -Ev "priv|vip" | awk '{print $1}'`
last_day="`date -d -1day +%Y-%m-%d`"
test -d /home/oracle/monitor/log || mkdir -p /home/oracle/monitor/log
test -d /home/oralce/monitor/tmp || mkdir -p /home/oracle/monitor/tmp
### check current day DB alert log ###
jud_act=`ps -ef | grep ora_pmon | grep -v grep | awk -F'_' '{print $3}' | wc -l`
if [[ $jud_act -gt 0 ]];then
for DB_SID in $(ps -ef | grep ora_pmon | grep -v grep | awk -F'_' '{print $3}')
do
db_curdy_alert_log="$MT_HOME/log/${ipaddr}_${DB_SID}_alert_`date +%Y%m%d`.log"
source /home/oracle/.bash_profile
sqlplus -S / as sysdba <<EOF > $MT_HOME/tmp/db_trace_dir.txt
set linesize 190 pagesize 0;
set feedback off;
select VALUE from v\$diag_info where name='Diag Trace';
EOF
db_alert_log="`cat $MT_HOME/tmp/db_trace_dir.txt`/alert_${DB_SID}.log"
sed -n "/^$last_day/,/GG/"p $db_alert_log > $db_curdy_alert_log
echo "###################IP ADDR:$ipaddr -- SID: $DB_SID ########################"
err_cnt=`cat $db_curdy_alert_log | grep -Ei "ora-|error|warn|System state dump|Deadlock" | wc -l`
if [ $err_cnt -eq 0 ];then
output=0
echo "There isn't error message in db alertfile!"
else
output=1
echo -e "\033[31mAttention,please!\033[0mThere are some error messages in DB alertfile!"
cat $db_curdy_alert_log | grep -Ei "ora-|error|warn|System state dump|Deadlock" -C1
fi
#test -f $db_curdy_alert_log && rm -f $db_curdy_alert_log
done
else
output=1
echo "Something wrong with the DB,please check it ASAP."
fi
exit 0