261 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			261 lines
		
	
	
		
			9.2 KiB
		
	
	
	
		
			Plaintext
		
	
	
		
			Executable File
		
	
	
	
	
#!/usr/bin/env seiscomp-python
 | 
						|
 | 
						|
############################################################################
 | 
						|
# Copyright (C) 2021 by gempa GmbH                                         #
 | 
						|
#                                                                          #
 | 
						|
# All Rights Reserved.                                                     #
 | 
						|
#                                                                          #
 | 
						|
# NOTICE: All information contained herein is, and remains                 #
 | 
						|
# the property of gempa GmbH and its suppliers, if any. The intellectual   #
 | 
						|
# and technical concepts contained herein are proprietary to gempa GmbH    #
 | 
						|
# and its suppliers.                                                       #
 | 
						|
# Dissemination of this information or reproduction of this material       #
 | 
						|
# is strictly forbidden unless prior written permission is obtained        #
 | 
						|
# from gempa GmbH.                                                         #
 | 
						|
#                                                                          #
 | 
						|
#  Author: Stephan Herrnkind                                               #
 | 
						|
#  Email: herrnkind@gempa.de                                               #
 | 
						|
############################################################################
 | 
						|
 | 
						|
import os
 | 
						|
import sys
 | 
						|
from collections import OrderedDict
 | 
						|
 | 
						|
from seiscomp import client, logging
 | 
						|
 | 
						|
 | 
						|
def writeUpdateStatements(database, tables, charset):
 | 
						|
    filename = f"/tmp/update-mysql-charset-{charset}-{database}.sql"
 | 
						|
    with open(filename, "w", encoding="utf8") as f:
 | 
						|
        print(
 | 
						|
            f"ALTER DATABASE `{database}` "
 | 
						|
            f"CHARACTER SET {charset} COLLATE {charset}_bin;",
 | 
						|
            file=f,
 | 
						|
        )
 | 
						|
        for table in tables:
 | 
						|
            print(
 | 
						|
                f"ALTER TABLE `{database}`.`{table}` "
 | 
						|
                f"CONVERT TO CHARACTER SET {charset} COLLATE {charset}_bin;",
 | 
						|
                file=f,
 | 
						|
            )
 | 
						|
        print("", file=f)
 | 
						|
        for table in tables:
 | 
						|
            print(f"ANALYZE TABLE `{database}`.`{table}`;", file=f)
 | 
						|
 | 
						|
    return filename
 | 
						|
 | 
						|
 | 
						|
def checkBinaryCollation(charset, collation):
 | 
						|
    return collation == f"{charset}_bin"
 | 
						|
 | 
						|
 | 
						|
class CheckDatabase(client.Application):
 | 
						|
    def __init__(self, argc, argv):
 | 
						|
        super().__init__(argc, argv)
 | 
						|
        self.setDaemonEnabled(False)
 | 
						|
        self.setMessagingEnabled(True)
 | 
						|
        self.setDatabaseEnabled(True, True)
 | 
						|
        self.setConnectionRetries(0)
 | 
						|
        self.setLoggingToStdErr(True)
 | 
						|
 | 
						|
    def validateParameters(self):
 | 
						|
        if not super().validateParameters():
 | 
						|
            return False
 | 
						|
 | 
						|
        # Disable messaging if database connection string is provided
 | 
						|
        if self.databaseURI():
 | 
						|
            self.setMessagingEnabled(False)
 | 
						|
 | 
						|
        return True
 | 
						|
 | 
						|
    def printUsage(self):
 | 
						|
        print(
 | 
						|
            f"""Usage:
 | 
						|
  {os.path.basename(__file__)} [options]"""
 | 
						|
        )
 | 
						|
 | 
						|
        client.Application.printUsage(self)
 | 
						|
 | 
						|
        print(
 | 
						|
            f"""Examples:
 | 
						|
Run the script getting the database parameters from default messaging
 | 
						|
  {os.path.basename(__file__)}
 | 
						|
 | 
						|
Run the script specifiying the database parameters
 | 
						|
  {os.path.basename(__file__)} --debug -d mysql://sysop:sysop@localhost/seiscomp"""
 | 
						|
        )
 | 
						|
 | 
						|
    def run(self):
 | 
						|
        if not self.query():
 | 
						|
            logging.error("No database connection available")
 | 
						|
            return False
 | 
						|
 | 
						|
        dbType = self.databaseType()
 | 
						|
 | 
						|
        if dbType and dbType != "mysql":
 | 
						|
            print(f"No tests for database type {dbType} available.")
 | 
						|
            return True
 | 
						|
 | 
						|
        db = self.query().driver()
 | 
						|
 | 
						|
        # query database name
 | 
						|
        q = "SELECT DATABASE()"
 | 
						|
        if not db.beginQuery(q) or not db.fetchRow() or not db.getRowFieldString(0):
 | 
						|
            logging.error("Could not query database name")
 | 
						|
            return False
 | 
						|
 | 
						|
        dbName = db.getRowFieldString(0)
 | 
						|
        db.endQuery()
 | 
						|
        logging.info(f"Checking encoding of database: {dbName}")
 | 
						|
 | 
						|
        # collect charset found at database, table and column level
 | 
						|
        charsets = set()
 | 
						|
 | 
						|
        # select default database character set and collation
 | 
						|
        q = (
 | 
						|
            "SELECT default_character_set_name, default_collation_name "
 | 
						|
            "FROM information_schema.SCHEMATA "
 | 
						|
            f"WHERE schema_name = '{dbName}'"
 | 
						|
        )
 | 
						|
        if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 2:
 | 
						|
            logging.error("Could not query default database charset and collation")
 | 
						|
            return False
 | 
						|
 | 
						|
        charset = db.getRowFieldString(0)
 | 
						|
        collation = db.getRowFieldString(1)
 | 
						|
        db.endQuery()
 | 
						|
 | 
						|
        binCollation = checkBinaryCollation(charset, collation)
 | 
						|
        logging.debug(f"{dbName:<48}{charset} -> {collation}")
 | 
						|
 | 
						|
        # select default table character set and collation
 | 
						|
        q = (
 | 
						|
            "SELECT T.table_name, CCSA.character_set_name, CCSA.collation_name "
 | 
						|
            "FROM information_schema.`TABLES` T, "
 | 
						|
            "information_schema.`COLLATION_CHARACTER_SET_APPLICABILITY` CCSA "
 | 
						|
            "WHERE CCSA.collation_name = T.table_collation AND "
 | 
						|
            f"T.table_schema = '{dbName}' "
 | 
						|
            "ORDER BY T.table_name"
 | 
						|
        )
 | 
						|
        if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 3:
 | 
						|
            logging.error("Could not query default charset and collation of tables")
 | 
						|
            return False
 | 
						|
        tables = OrderedDict()
 | 
						|
        while True:
 | 
						|
            table, charset, collation = (
 | 
						|
                db.getRowFieldString(col) for col in range(0, 3)
 | 
						|
            )
 | 
						|
            tables[table] = (charset, collation)
 | 
						|
            charsets.add(charset)
 | 
						|
            binCollation = binCollation and checkBinaryCollation(charset, collation)
 | 
						|
 | 
						|
            if not db.fetchRow():
 | 
						|
                break
 | 
						|
 | 
						|
        db.endQuery()
 | 
						|
 | 
						|
        # select charset and collation of all tables and columns
 | 
						|
        q = (
 | 
						|
            "SELECT table_name, column_name, character_set_name, collation_name "
 | 
						|
            "FROM information_schema.`COLUMNS` "
 | 
						|
            f"WHERE table_schema = '{dbName}' "
 | 
						|
            "ORDER BY table_name, column_name"
 | 
						|
        )
 | 
						|
        if not db.beginQuery(q) or not db.fetchRow() or db.getRowFieldCount() != 4:
 | 
						|
            logging.error("Could not query charset and collation of columns")
 | 
						|
            return False
 | 
						|
 | 
						|
        prevTable = None
 | 
						|
        while True:
 | 
						|
            table, col, charset, collation = (
 | 
						|
                db.getRowFieldString(col) for col in range(0, 4)
 | 
						|
            )
 | 
						|
            if prevTable != table:
 | 
						|
                if table not in tables:
 | 
						|
                    tables[table] = ("?", "?")
 | 
						|
                tCharset, tCollation = tables[table]
 | 
						|
                logging.debug(f"    {table:<44}{tCharset} -> {tCollation}")
 | 
						|
                prevTable = table
 | 
						|
 | 
						|
            if charset:
 | 
						|
                logging.debug(f"        {col:<40}{charset} -> {collation}")
 | 
						|
 | 
						|
                charsets.add(charset)
 | 
						|
                binCollation = binCollation and checkBinaryCollation(charset, collation)
 | 
						|
 | 
						|
            if not db.fetchRow():
 | 
						|
                break
 | 
						|
 | 
						|
        db.endQuery()
 | 
						|
 | 
						|
        filenames = []
 | 
						|
        issues = []
 | 
						|
        utf8mb4 = "utf8mb4"
 | 
						|
 | 
						|
        if utf8mb4 not in charsets:
 | 
						|
            filename = writeUpdateStatements(dbName, tables.keys(), utf8mb4)
 | 
						|
            issues.append(
 | 
						|
                f"Your database is not configured with the {utf8mb4} character set. "
 | 
						|
                "Certain unicode characters may not be stored correctly. Consider "
 | 
						|
                f"applying the migrations in:\n    - {filename}"
 | 
						|
            )
 | 
						|
 | 
						|
        noBinText = (
 | 
						|
            "Found collation other than 'binary'. Case-insensitive collations should "
 | 
						|
            "be avoided because they may lead to publicID collisions. "
 | 
						|
        )
 | 
						|
 | 
						|
        if len(charsets) > 1:
 | 
						|
            filenames = []
 | 
						|
            for charset in charsets:
 | 
						|
                filename = writeUpdateStatements(dbName, tables.keys(), charset)
 | 
						|
                if charset == utf8mb4:
 | 
						|
                    filename += " (preferred)"
 | 
						|
                filenames.append(filename)
 | 
						|
            fileNamesText = "\n    - ".join(filenames)
 | 
						|
            issues.append(
 | 
						|
                "Found more than one character set. It is recommended to use the same "
 | 
						|
                f"character set across all tables. {'' if binCollation else noBinText}"
 | 
						|
                "Consider applying the migrations in one of the following files:"
 | 
						|
                f"\n    - {fileNamesText}"
 | 
						|
            )
 | 
						|
        elif not binCollation:
 | 
						|
            filename = writeUpdateStatements(dbName, tables.keys(), charsets.pop())
 | 
						|
            issues.append(
 | 
						|
                f"{noBinText}Consider applying the migrations in:\n    - {filename}"
 | 
						|
            )
 | 
						|
 | 
						|
        if issues:
 | 
						|
            print("Found database issues:")
 | 
						|
            for issue in issues:
 | 
						|
                print(f"  * {issue}")
 | 
						|
 | 
						|
            print(
 | 
						|
                """
 | 
						|
Update instructions:
 | 
						|
  * Stop scmaster
 | 
						|
  * Ensure, no other modules like scdb, scardac, etc. or custom tools from internal or
 | 
						|
    external clients attempt accessing the database.
 | 
						|
  * Login to your database, e.g.:
 | 
						|
      mysql -u sysop -p
 | 
						|
  * Source one of the suggested update scripts:
 | 
						|
      SOURCE /tmp/update-mysql-charset-CHARACTERSET-DATABASE.sql"""
 | 
						|
            )
 | 
						|
 | 
						|
            return False
 | 
						|
 | 
						|
        print("No database issues found.")
 | 
						|
 | 
						|
        return True
 | 
						|
 | 
						|
 | 
						|
# Main method to call the app
 | 
						|
def main(argc, argv):
 | 
						|
    app = CheckDatabase(argc, argv)
 | 
						|
    return app()
 | 
						|
 | 
						|
 | 
						|
# Call the main method if run as script
 | 
						|
if __name__ == "__main__":
 | 
						|
    sys.exit(main(len(sys.argv), sys.argv))
 |