1.需求背景
系統(tǒng)程序突然報(bào)錯(cuò),報(bào)錯(cuò)信息如下:
The transaction log for database '@dbname' is full. To find out why space in the log cannot be reused, see the log_reuse_wait_desc column in sys.databases
此時(shí)查看log文件,已達(dá)2T。
當(dāng)時(shí)的緊急處理方案是,移除掉鏡像,修改數(shù)據(jù)庫(kù)恢復(fù)模式(由full修改為simple),收縮日志。
為了防止類似問題再次發(fā)生,需對(duì)log 文件的大小進(jìn)行監(jiān)控,當(dāng)?shù)竭_(dá)閾值后,觸發(fā)告警。
2.主要基礎(chǔ)組件(類)
配置文件qqmssqltest_db_server_conf.ini
同過此配置文件獲取DB Server信息、DB信息、UID信息、郵件服務(wù)器信息等。
[sqlserver]
db_user = XXXXXX
db_pwd = XXXXXXX
[sqlserver_qq]
db_host = 110.119.120.114
db_port = 1433
[windows]
user =
pwd =
[mail]
host = zheshiceshidemail.qq.com
port = 25
user =
pwd =
sender = zhejiushiceshidebuyaodangzhen@qq.com
獲取連接串的組件mssql_get_db_connect.py
# -*- coding: utf-8 -*-
import sys
import os
import datetime
import configparser
import pymssql
# pip3 install pymssql-2.1.4-cp37-cp37m-win_amd64.whl
# pip3 install pymssql -i https://pypi.doubanio.com/simple
# 獲取連接串信息
def mssql_get_db_connect(db_host, db_port):
db_host = db_host
db_port = db_port
db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
config = configparser.ConfigParser()
config.read(db_ps_file, encoding="utf-8")
db_user = config.get('sqlserver', 'db_user')
db_pwd = config.get('sqlserver', 'db_pwd')
conn = pymssql.connect(host=db_host, port=db_port, user=db_user, password=db_pwd, charset="utf8", login_timeout=5, timeout=600, autocommit=True)
return conn
執(zhí)行SQL語(yǔ)句的組件mysql_exec_sql.py
# -*- coding: utf-8 -*-
import mysql_get_db_connect
def mysql_exec_dml_sql(db_host, db_port, exec_sql):
conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)
with conn.cursor() as cursor_db:
cursor_db.execute(exec_sql)
conn.commit()
def mysql_exec_select_sql(db_host, db_port, exec_sql):
conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)
with conn.cursor() as cursor_db:
cursor_db.execute(exec_sql)
sql_rst = cursor_db.fetchall()
return sql_rst
def mysql_exec_select_sql_include_colnames(db_host, db_port, exec_sql):
conn = mysql_get_db_connect.mysql_get_db_connect(db_host, db_port)
with conn.cursor() as cursor_db:
cursor_db.execute(exec_sql)
sql_rst = cursor_db.fetchall()
col_names = cursor_db.description
return sql_rst, col_names
發(fā)郵件的功能send_monitor_mail.py
# -*- coding: utf-8 -*-
# pip3 install PyEmail
import smtplib
from email.mime.text import MIMEText
import configparser
import os
import sys
# 發(fā)送告警郵件
def send_monitor_mail(mail_subject, mail_body, mail_receivers="testwukongbaigujing@qq.com"):
db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
config = configparser.ConfigParser()
config.read(db_ps_file, encoding="utf-8")
mail_host = config.get('mail', 'host')
mail_port = config.get('mail', 'port')
# mail_user = config.get('mail', 'user')
# mail_pwd = config.get('mail', 'pwd')
sender = config.get('mail', 'sender')
# receivers = config.get('mail', 'receivers')
# 發(fā)送HTML格式郵件
message = MIMEText(mail_body, 'html', 'utf-8')
# message = MIMEText(mail_body, 'plain', 'utf-8')
message['subject'] = mail_subject
message['From'] = sender
message['To'] = mail_receivers
try:
smtpObj = smtplib.SMTP()
smtpObj.connect(mail_host, mail_port) # 25 為 SMTP 端口號(hào)
# SMTP AUTH extension not supported by server.
# https://github.com/miguelgrinberg/microblog/issues/76
# smtpObj.ehlo()
# smtpObj.starttls()
# smtpObj.login(mail_user, mail_pwd)
smtpObj.sendmail(sender, mail_receivers, message.as_string())
smtpObj.quit()
print("郵件發(fā)送成功")
except Exception as e:
print(e)
# except smtplib.SMTPException:
# print("Error: 無法發(fā)送郵件")
3.主要功能代碼
收集到的DB數(shù)據(jù)文件的信息保存到表mssql_dblogsize中,其建表的腳本如下:
CREATE TABLE [dbo].[mssql_dblogsize](
[id] [int] IDENTITY(1,1) NOT NULL,
[createtime] [datetime] NULL,
[vip] [nvarchar](100) NULL,
[port] [nvarchar](100) NULL,
[Environment] [nvarchar](200) NULL,
[Dbname] [varchar](200) NULL,
[Logical_Name] [varchar](200) NULL,
[Physical_Name] [varchar](1500) NULL,
[Size] [bigint] NULL,
PRIMARY KEY CLUSTERED
(
[id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[mssql_dblogsize] ADD DEFAULT (getdate()) FOR [createtime]
GO
為了方便對(duì)表mssql_dblogsize的數(shù)據(jù)進(jìn)行管理和展示,在其基礎(chǔ)上抽象加工出了一個(gè)視圖v_mssql_dblogsize,注意Size大小的轉(zhuǎn)換(Size/128/1024 as SizeGB)
創(chuàng)建視圖的腳本如下:
CREATE view [dbo].[v_mssql_dblogsize]
as
SELECT [id]
,[createtime]
,[vip]
,[port]
,[Environment]
,[Dbname]
,[Logical_Name]
,[Physical_Name]
,Size/128/1024 as SizeGB
FROM [dbo].[mssql_dblogsize]
where size >50*128*1024
and Physical_Name like '%ldf%'
GO
本測(cè)試實(shí)例使用的數(shù)據(jù)庫(kù)為qqDB,監(jiān)控的各個(gè)DB Server保存在了表QQDBServer中,注意Port 不一定為標(biāo)準(zhǔn)端口1433.
collect_mssql_dblogsize_info.py
# -*- coding: utf-8 -*-
import sys
import os
import configparser
import pymssql
import mssql_get_db_connect
import mssql_exec_sql
from datetime import datetime
def collect_mssql_dblogsize_info():
db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
config = configparser.ConfigParser()
config.read(db_ps_file, encoding="utf-8")
m_db_host = config.get('sqlserver_qq', 'db_host')
m_db_port = config.getint('sqlserver_qq', 'db_port')
# 獲取需要遍歷的DB列表
exec_sql_1 = """
SELECT IP, case Port when '1444,1433' then '1433' else Port end as Port, Environment
FROM qqDB.dbo.QQDBServer
where InUse =1 AND ServerType IN ('SQL')
and IP=VIP ;
"""
sql_rst_1 = mssql_exec_sql.mssql_exec_select_sql(m_db_host, m_db_port, exec_sql_1)
for j in sql_rst_1:
db_host_2 = j[0]
db_port_2 = j[1]
db_Environment = j[2]
exec_sql_2 = """
select '""" + db_host_2 + """' as vip, '""" + db_port_2 + """' as port, '""" + db_Environment + """' as Environment,DB_NAME(database_id) AS DatabaseName,
Name AS Logical_Name,
Physical_Name, size
FROM master.sys.master_files;
"""
try:
sql_rst_2 = mssql_exec_sql.mssql_exec_select_sql(db_host_2, db_port_2, exec_sql_2)
except Exception as e:
print(e)
for k in sql_rst_2:
exec_sql_3 = """
insert into qqDB..mssql_dblogsize([vip], [port], [Environment], [Dbname], [Logical_Name], [Physical_Name], [Size])
values('%s', '%s', '%s', '%s', '%s', '%s', '%s');
"""
conn = mssql_get_db_connect.mssql_get_db_connect(m_db_host, m_db_port)
with conn.cursor() as cursor_db:
cursor_db.execute(exec_sql_3 % (k[0], k[1], k[2], k[3], k[4], k[5], k[6] ))
conn.commit()
collect_mssql_dblogsize_info()
告警郵件的功能實(shí)現(xiàn)為mssql_alert_dblogsize.py,此份代碼的告警閾值設(shè)置的為50G,數(shù)據(jù)來自于視圖v_mssql_dblogsize。
# -*- coding: utf-8 -*-
import sys
import os
import configparser
import pymssql
import mssql_get_db_connect
import mssql_exec_sql
import datetime
import send_monitor_mail
import pandas as pd
def mssql_alert_dblogsize():
mail_subject = "SQL Server DB Log Size Greater than 50G, please check!!! "
mail_receivers = "testDBAgrp@qtiantianq.com"
db_ps_file = os.path.join(sys.path[0], "qqmssqltest_db_server_conf.ini")
config = configparser.ConfigParser()
config.read(db_ps_file, encoding="utf-8")
m_db_host = config.get('sqlserver_qq', 'db_host')
m_db_port = config.getint('sqlserver_qq', 'db_port')
# 獲取需要遍歷的DB列表
exec_sql_4 = """
SELECT [vip] as IP,[port],[Environment],[Dbname]
,[Logical_Name],[Physical_Name],[SizeGB],[createtime]
FROM qqDB.[dbo].[v_mssql_dblogsize]
order by VIP,Dbname;
"""
sql_rst_4, col_name = mssql_exec_sql.mssql_exec_select_sql_include_colnames(m_db_host, m_db_port, exec_sql_4)
# print(sql_rst_4)
if len(sql_rst_4):
mail_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
columns = []
for i in range(len(col_name)):
columns.append(col_name[i][0])
df = pd.DataFrame(columns=columns)
for i in range(len(sql_rst_4)):
df.loc[i] = list(sql_rst_4[i])
mail_body = df.to_html(index=False, justify="left").replace('th>', 'th style = "color:red; text-align:left; background-color: yellow">')
mail_html = "html>body>h4>" + "Deal All : " + "br>h4>" + "以下數(shù)據(jù)庫(kù)的db log文件,已大于50G.請(qǐng)及時(shí)檢查,謝謝! " + "br>h4>" + mail_body + "/body>/html>"
send_monitor_mail.send_monitor_mail(mail_subject=mail_subject, mail_body=mail_html, mail_receivers=mail_receivers)
mssql_alert_dblogsize()
4.實(shí)現(xiàn)
定時(shí)任務(wù)是通過windows的計(jì)劃任務(wù)來實(shí)現(xiàn)的,在此不做過多的敘述。告警郵件的部分截圖如下:
5.附錄
1.報(bào)錯(cuò)定位,判斷是不是log文件過大
https://blog.csdn.net/weixin_30785593/article/details/99912405
2.關(guān)于為什么數(shù)據(jù)庫(kù)log文件過大,我們可以參考以下分享的文章
https://blog.csdn.net/chinadm123/article/details/44941275
到此這篇關(guān)于通過Python實(shí)現(xiàn)對(duì)SQL Server 數(shù)據(jù)文件大小的監(jiān)控告警的文章就介紹到這了,更多相關(guān)PythonSQL Server 數(shù)據(jù)監(jiān)控告警內(nèi)容請(qǐng)搜索腳本之家以前的文章或繼續(xù)瀏覽下面的相關(guān)文章希望大家以后多多支持腳本之家!
您可能感興趣的文章:- Python實(shí)現(xiàn)用手機(jī)監(jiān)控遠(yuǎn)程控制電腦的方法
- python實(shí)現(xiàn)的web監(jiān)控系統(tǒng)
- python自動(dòng)統(tǒng)計(jì)zabbix系統(tǒng)監(jiān)控覆蓋率的示例代碼
- python使用pynput庫(kù)操作、監(jiān)控你的鼠標(biāo)和鍵盤
- python基于watchdog庫(kù)全自動(dòng)化監(jiān)控目錄文件
- python 自動(dòng)監(jiān)控最新郵件并讀取的操作
- python使用Windows的wmic命令監(jiān)控文件運(yùn)行狀況,如有異常發(fā)送郵件報(bào)警
- 用python監(jiān)控服務(wù)器的cpu,磁盤空間,內(nèi)存,超過郵件報(bào)警
- python 監(jiān)控服務(wù)器是否有人遠(yuǎn)程登錄(詳細(xì)思路+代碼)
- 如何基于Python和Flask編寫Prometheus監(jiān)控
- 教你怎么用Python監(jiān)控愉客行車程