#!/usr/bin/env seiscomp-python ############################################################################ # Copyright (C) 2021 by gempa GmbH # # # # All Rights Reserved. # # # # NOTICE: All information contained herein is, and remains # # the property of gempa GmbH and its suppliers, if any. The intellectual # # and technical concepts contained herein are proprietary to gempa GmbH # # and its suppliers. # # Dissemination of this information or reproduction of this material # # is strictly forbidden unless prior written permission is obtained # # from gempa GmbH. # # # # Author: Stephan Herrnkind # # Email: herrnkind@gempa.de # ############################################################################ import os import sys from collections import OrderedDict from seiscomp import client, logging def writeUpdateStatements(database, tables, charset): filename = f"/tmp/update-mysql-charset-{charset}-{database}.sql" with open(filename, "w", encoding="utf8") as f: print( f"ALTER DATABASE `{database}` " f"CHARACTER SET {charset} COLLATE {charset}_bin;", file=f, ) for table in tables: print( f"ALTER TABLE `{database}`.`{table}` " f"CONVERT TO CHARACTER SET {charset} COLLATE {charset}_bin;", file=f, ) print("", file=f) for table in tables: print(f"ANALYZE TABLE `{database}`.`{table}`;", file=f) return filename def checkBinaryCollation(charset, collation): return collation == f"{charset}_bin" class CheckDatabase(client.Application): def __init__(self, argc, argv): super().__init__(argc, argv) self.setDaemonEnabled(False) self.setMessagingEnabled(True) self.setDatabaseEnabled(True, True) self.setConnectionRetries(0) self.setLoggingToStdErr(True) def validateParameters(self): if not super().validateParameters(): return False # Disable messaging if database connection string is provided if self.databaseURI(): self.setMessagingEnabled(False) return True def printUsage(self): print( f"""Usage: {os.path.basename(__file__)} [options]""" ) client.Application.printUsage(self) print( f"""Examples: Run the script getting the database parameters from default messaging {os.path.basename(__file__)} Run the script specifiying the database parameters {os.path.basename(__file__)} --debug -d mysql://sysop:sysop@localhost/seiscomp""" ) def run(self): if not self.query(): logging.error("No database connection available") return False dbType = self.databaseType() if dbType and dbType != "mysql": print(f"No tests for database type {dbType} available.") return True db = self.query().driver() # query database name q = "SELECT DATABASE()" if not db.beginQuery(q) or not db.fetchRow() or not db.getRowFieldString(0): logging.error("Could not query database name") return False dbName = db.getRowFieldString(0) db.endQuery() logging.info(f"Checking encoding of database: {dbName}") # collect charset found at database, table and column level charsets = set() # select default database character set and collation q = ( "SELECT default_character_set_name, default_collation_name " "FROM information_schema.SCHEMATA " f"WHERE schema_name = '{dbName}'" ) if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 2: logging.error("Could not query default database charset and collation") return False charset = db.getRowFieldString(0) collation = db.getRowFieldString(1) db.endQuery() binCollation = checkBinaryCollation(charset, collation) logging.debug(f"{dbName:<48}{charset} -> {collation}") # select default table character set and collation q = ( "SELECT T.table_name, CCSA.character_set_name, CCSA.collation_name " "FROM information_schema.`TABLES` T, " "information_schema.`COLLATION_CHARACTER_SET_APPLICABILITY` CCSA " "WHERE CCSA.collation_name = T.table_collation AND " f"T.table_schema = '{dbName}' " "ORDER BY T.table_name" ) if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 3: logging.error("Could not query default charset and collation of tables") return False tables = OrderedDict() while True: table, charset, collation = ( db.getRowFieldString(col) for col in range(0, 3) ) tables[table] = (charset, collation) charsets.add(charset) binCollation = binCollation and checkBinaryCollation(charset, collation) if not db.fetchRow(): break db.endQuery() # select charset and collation of all tables and columns q = ( "SELECT table_name, column_name, character_set_name, collation_name " "FROM information_schema.`COLUMNS` " f"WHERE table_schema = '{dbName}' " "ORDER BY table_name, column_name" ) if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 4: logging.error("Could not query charset and collation of columns") return False prevTable = None while True: table, col, charset, collation = ( db.getRowFieldString(col) for col in range(0, 4) ) if prevTable != table: if table not in tables: tables[table] = ("?", "?") tCharset, tCollation = tables[table] logging.debug(f" {table:<44}{tCharset} -> {tCollation}") prevTable = table if charset: logging.debug(f" {col:<40}{charset} -> {collation}") charsets.add(charset) binCollation = binCollation and checkBinaryCollation(charset, collation) if not db.fetchRow(): break db.endQuery() filenames = [] issues = [] utf8mb4 = "utf8mb4" if utf8mb4 not in charsets: filename = writeUpdateStatements(dbName, tables.keys(), utf8mb4) issues.append( f"Your database is not configured with the {utf8mb4} character set. " "Certain unicode characters may not be stored correctly. Consider " f"applying the migrations in:\n - {filename}" ) noBinText = ( "Found collation other than 'binary'. Case-insensitive collations should " "be avoided because they may lead to publicID collisions. " ) if len(charsets) > 1: filenames = [] for charset in charsets: filename = writeUpdateStatements(dbName, tables.keys(), charset) if charset == utf8mb4: filename += " (preferred)" filenames.append(filename) fileNamesText = "\n - ".join(filenames) issues.append( "Found more than one character set. It is recommended to use the same " f"character set across all tables. {'' if binCollation else noBinText}" "Consider applying the migrations in one of the following files:" f"\n - {fileNamesText}" ) elif not binCollation: filename = writeUpdateStatements(dbName, tables.keys(), charsets.pop()) issues.append( f"{noBinText}Consider applying the migrations in:\n - {filename}" ) if issues: print("Found database issues:") for issue in issues: print(f" * {issue}") print( """ Update instructions: * Stop scmaster * Ensure, no other modules like scdb, scardac, etc. or custom tools from internal or external clients attempt accessing the database. * Login to your database, e.g.: mysql -u sysop -p * Source one of the suggested update scripts: SOURCE /tmp/update-mysql-charset-CHARACTERSET-DATABASE.sql""" ) return False print("No database issues found.") return True # Main method to call the app def main(argc, argv): app = CheckDatabase(argc, argv) return app() # Call the main method if run as script if __name__ == "__main__": sys.exit(main(len(sys.argv), sys.argv))