Automate Source And Target Table Column Comparison With Java
- By : Mydatahack
- Category : Data Engineering, Tools and Utilities
- Tags: Audit Automation, Java
We may encounter a situation where we need to check if the source system adds or drops columns regularly. For example, when the source system is constantly going through a heavy development, the audit process automation can be helpful.
The code uses JDBC for both target and source database connections. You need to compile the code with appropriate JDBC drivers. It takes the target schema as an argument and checks all the tables in the specified schema in the target database. It will create an output to show which table has added or dropped columns as a csv file. Database connection and metadata retrieval are managed by the Database class.
The advantage of this design is that you can use the same jar file for multiple data sources as long as you compile the code with JDBC driver. The code is vendor neutral.
The code can be scheduled to run in a server or ETL tool with notification. You can even fetch the csv file and write it to a database table.
Hava fun!
Code
Database.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | package Database; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.util.ArrayList; import java.util.Vector; public class Database { private String className; private String connectionUrl; private String userName; private String passWord; public Database (String cName, String url, String uName, String pName){ className = cName; connectionUrl = url; userName = uName; passWord = pName; } // 1. Create Connection public Connection createConnection(){ Connection connection = null; // (1) Load JDBC Driver try { System.out.println("Loading JDBC Driver..."); Class.forName(className); System.out.println("JDBC loaded successfully."); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } // (2) Create connection try { System.out.println("Connecting to Database ..."); connection = DriverManager.getConnection(connectionUrl, userName, passWord); } catch (SQLException e){ System.out.println(e.toString()); } return connection; } // 2. Close Connection public void closeConnection(Connection conn) { try { conn.close(); System.out.println("Database Connection Closed"); } catch (SQLException e){ System.out.println(e.toString()); } } // 3. Get tables in target schema public Vector<String> getTables(String sql, int columnIndex, Connection conn) { Vector<String> tables = new Vector<String>(); Statement stmt = null; try { stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(sql); while (rs.next()) { tables.add(rs.getString(columnIndex)); // System.out.println(rs.getString(columnIndex)); } } catch (SQLException e){ System.out.println(e.toString()); } return tables; } // 4. Get Column Metadata public ArrayList<String> getColumns (String table, String schema, Connection conn) { ArrayList<String> columns = new ArrayList<String>(); Statement stmt = null; try { // 4-1 Execute query and get metadata stmt = conn.createStatement(); String query = ""; if (schema == "None") { query = "Select * From " + table + " limit 1;"; } else { query = "Select * From " + schema + "." + table + " limit 1;"; } ResultSet rs = stmt.executeQuery(query); ResultSetMetaData mt = rs.getMetaData(); int columnCount = mt.getColumnCount(); // System.out.println("No of Columns is : " + columnCount); // 4-2 Get column string and append to ArrayList for (int i = 1; i <= columnCount; i++) { columns.add(mt.getColumnLabel(i).toLowerCase()); } } catch (SQLException e) { System.out.println(e.toString()); } return columns; } } |
Main.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | package CompareColumn; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.sql.Connection; import java.util.ArrayList; import java.util.Iterator; import java.util.Vector; import Database.Database; public class CompareColumn { public static ArrayList<String> getAddedColumns(ArrayList<String> source, ArrayList<String> target){ ArrayList<String> addedColumns = new ArrayList<String>(); Iterator<String> iter = source.iterator(); while (iter.hasNext()) { String col = iter.next(); if (!target.contains(col)) { addedColumns.add(col); } } return addedColumns; } public static ArrayList<String> getDroppedColumns(ArrayList<String> source, ArrayList<String> target){ ArrayList<String> droppedColumns = new ArrayList<String>(); Iterator<String> iter = target.iterator(); while (iter.hasNext()) { String col = iter.next(); if (!source.contains(col)) { droppedColumns.add(col); } } return droppedColumns; } public static void main(String[] args) { // Set variable from argument String targetJDBCClassName = args[0]; String targetDbUrl = args[1]; String targetDbUser = args[2]; String targetDbPass = args[3]; String targetSchema = args[4]; String sourceJDBCClassName = args[5]; String sourceDbUrl = args[6]; String sourceDbUser = args[7]; String sourceDbPass = args[8]; String sourceSchema = args[9]; String sourceSystem = args[10]; String filePath = args[11]; // (1) Prepare file for writing try { FileWriter outFile = new FileWriter(filePath); PrintWriter outText = new PrintWriter(outFile); outText.println(""Source","Table","Descr","Columns""); // (2) Connect to database and compare columns between source & target Database targetDb = new Database(targetJDBCClassName, targetDbUrl, targetDbUser, targetDbPass); Connection targetConnection = targetDb.createConnection(); Database sourceDb = new Database(sourceJDBCClassName, sourceDbUrl, sourceDbUser, sourceDbPass); Connection sourceConnection = sourceDb.createConnection(); Vector<String> targetTables = targetDb.getTables( String.format("SELECT table_name FROM information_schema.tables WHERE table_schema = '%s';", targetSchema), 1, targetConnection); Iterator<String> ilist = targetTables.iterator(); while (ilist.hasNext()) { String tableName = (String) ilist.next(); System.out.println(tableName); ArrayList<String> targetColumns = targetDb.getColumns(tableName, targetSchema, targetConnection); ArrayList<String> sourceColumns = sourceDb.getColumns(tableName, sourceSchema, sourceConnection); ArrayList<String> added = getAddedColumns(sourceColumns, targetColumns); ArrayList<String> dropped = getDroppedColumns(sourceColumns, targetColumns); if (added.size() > 0 ) { System.out.println("Columns added to " + tableName + ": " + added); outText.println(String.format(""%s","%s","Added","%s"", sourceSystem, tableName, added)); } else { // System.out.println("No column added to " + tableName); } if (dropped.size() > 0 && dropped.size() < targetColumns.size()) { System.out.println("Columns dropped from " + tableName + ": " + dropped); outText.println(String.format(""%s","%s","Dropped","%s"", sourceSystem, tableName, dropped)); } else if (dropped.size() == targetColumns.size()) { outText.println(String.format(""%s","%s","Table Name Mismatch",""", sourceSystem, tableName)); } else { // System.out.println("No column dropped from " + tableName); } } outText.close(); targetDb.closeConnection(targetConnection); sourceDb.closeConnection(sourceConnection); } catch (IOException error) { System.out.println(error); } System.exit(0); } } |