Files
2025/bin/gempa-check-database

261 lines
9.2 KiB
Plaintext
Executable File

#!/usr/bin/env seiscomp-python
############################################################################
# Copyright (C) 2021 by gempa GmbH #
# #
# All Rights Reserved. #
# #
# NOTICE: All information contained herein is, and remains #
# the property of gempa GmbH and its suppliers, if any. The intellectual #
# and technical concepts contained herein are proprietary to gempa GmbH #
# and its suppliers. #
# Dissemination of this information or reproduction of this material #
# is strictly forbidden unless prior written permission is obtained #
# from gempa GmbH. #
# #
# Author: Stephan Herrnkind #
# Email: herrnkind@gempa.de #
############################################################################
import os
import sys
from collections import OrderedDict
from seiscomp import client, logging
def writeUpdateStatements(database, tables, charset):
filename = f"/tmp/update-mysql-charset-{charset}-{database}.sql"
with open(filename, "w", encoding="utf8") as f:
print(
f"ALTER DATABASE `{database}` "
f"CHARACTER SET {charset} COLLATE {charset}_bin;",
file=f,
)
for table in tables:
print(
f"ALTER TABLE `{database}`.`{table}` "
f"CONVERT TO CHARACTER SET {charset} COLLATE {charset}_bin;",
file=f,
)
print("", file=f)
for table in tables:
print(f"ANALYZE TABLE `{database}`.`{table}`;", file=f)
return filename
def checkBinaryCollation(charset, collation):
return collation == f"{charset}_bin"
class CheckDatabase(client.Application):
def __init__(self, argc, argv):
super().__init__(argc, argv)
self.setDaemonEnabled(False)
self.setMessagingEnabled(True)
self.setDatabaseEnabled(True, True)
self.setConnectionRetries(0)
self.setLoggingToStdErr(True)
def validateParameters(self):
if not super().validateParameters():
return False
# Disable messaging if database connection string is provided
if self.databaseURI():
self.setMessagingEnabled(False)
return True
def printUsage(self):
print(
f"""Usage:
{os.path.basename(__file__)} [options]"""
)
client.Application.printUsage(self)
print(
f"""Examples:
Run the script getting the database parameters from default messaging
{os.path.basename(__file__)}
Run the script specifiying the database parameters
{os.path.basename(__file__)} --debug -d mysql://sysop:sysop@localhost/seiscomp"""
)
def run(self):
if not self.query():
logging.error("No database connection available")
return False
dbType = self.databaseType()
if dbType and dbType != "mysql":
print(f"No tests for database type {dbType} available.")
return True
db = self.query().driver()
# query database name
q = "SELECT DATABASE()"
if not db.beginQuery(q) or not db.fetchRow() or not db.getRowFieldString(0):
logging.error("Could not query database name")
return False
dbName = db.getRowFieldString(0)
db.endQuery()
logging.info(f"Checking encoding of database: {dbName}")
# collect charset found at database, table and column level
charsets = set()
# select default database character set and collation
q = (
"SELECT default_character_set_name, default_collation_name "
"FROM information_schema.SCHEMATA "
f"WHERE schema_name = '{dbName}'"
)
if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 2:
logging.error("Could not query default database charset and collation")
return False
charset = db.getRowFieldString(0)
collation = db.getRowFieldString(1)
db.endQuery()
binCollation = checkBinaryCollation(charset, collation)
logging.debug(f"{dbName:<48}{charset} -> {collation}")
# select default table character set and collation
q = (
"SELECT T.table_name, CCSA.character_set_name, CCSA.collation_name "
"FROM information_schema.`TABLES` T, "
"information_schema.`COLLATION_CHARACTER_SET_APPLICABILITY` CCSA "
"WHERE CCSA.collation_name = T.table_collation AND "
f"T.table_schema = '{dbName}' "
"ORDER BY T.table_name"
)
if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 3:
logging.error("Could not query default charset and collation of tables")
return False
tables = OrderedDict()
while True:
table, charset, collation = (
db.getRowFieldString(col) for col in range(0, 3)
)
tables[table] = (charset, collation)
charsets.add(charset)
binCollation = binCollation and checkBinaryCollation(charset, collation)
if not db.fetchRow():
break
db.endQuery()
# select charset and collation of all tables and columns
q = (
"SELECT table_name, column_name, character_set_name, collation_name "
"FROM information_schema.`COLUMNS` "
f"WHERE table_schema = '{dbName}' "
"ORDER BY table_name, column_name"
)
if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 4:
logging.error("Could not query charset and collation of columns")
return False
prevTable = None
while True:
table, col, charset, collation = (
db.getRowFieldString(col) for col in range(0, 4)
)
if prevTable != table:
if table not in tables:
tables[table] = ("?", "?")
tCharset, tCollation = tables[table]
logging.debug(f" {table:<44}{tCharset} -> {tCollation}")
prevTable = table
if charset:
logging.debug(f" {col:<40}{charset} -> {collation}")
charsets.add(charset)
binCollation = binCollation and checkBinaryCollation(charset, collation)
if not db.fetchRow():
break
db.endQuery()
filenames = []
issues = []
utf8mb4 = "utf8mb4"
if utf8mb4 not in charsets:
filename = writeUpdateStatements(dbName, tables.keys(), utf8mb4)
issues.append(
f"Your database is not configured with the {utf8mb4} character set. "
"Certain unicode characters may not be stored correctly. Consider "
f"applying the migrations in:\n - {filename}"
)
noBinText = (
"Found collation other than 'binary'. Case-insensitive collations should "
"be avoided because they may lead to publicID collisions. "
)
if len(charsets) > 1:
filenames = []
for charset in charsets:
filename = writeUpdateStatements(dbName, tables.keys(), charset)
if charset == utf8mb4:
filename += " (preferred)"
filenames.append(filename)
fileNamesText = "\n - ".join(filenames)
issues.append(
"Found more than one character set. It is recommended to use the same "
f"character set across all tables. {'' if binCollation else noBinText}"
"Consider applying the migrations in one of the following files:"
f"\n - {fileNamesText}"
)
elif not binCollation:
filename = writeUpdateStatements(dbName, tables.keys(), charsets.pop())
issues.append(
f"{noBinText}Consider applying the migrations in:\n - {filename}"
)
if issues:
print("Found database issues:")
for issue in issues:
print(f" * {issue}")
print(
"""
Update instructions:
* Stop scmaster
* Ensure, no other modules like scdb, scardac, etc. or custom tools from internal or
external clients attempt accessing the database.
* Login to your database, e.g.:
mysql -u sysop -p
* Source one of the suggested update scripts:
SOURCE /tmp/update-mysql-charset-CHARACTERSET-DATABASE.sql"""
)
return False
print("No database issues found.")
return True
# Main method to call the app
def main(argc, argv):
app = CheckDatabase(argc, argv)
return app()
# Call the main method if run as script
if __name__ == "__main__":
sys.exit(main(len(sys.argv), sys.argv))