Automate Source And Target Table Column Comparison With Java

We may encounter a situation where we need to check if the source system adds or drops columns regularly. For example, when the source system is constantly going through a heavy development, the audit process automation can be helpful.

The code uses JDBC for both target and source database connections. You need to compile the code with appropriate JDBC drivers. It takes the target schema as an argument and checks all the tables in the specified schema in the target database. It will create an output to show which table has added or dropped columns as a csv file. Database connection and metadata retrieval are managed by the Database class.

The advantage of this design is that you can use the same jar file for multiple data sources as long as you compile the code with JDBC driver. The code is vendor neutral.

The code can be scheduled to run in a server or ETL tool with notification. You can even fetch the csv file and write it to a database table.

Hava fun!

Code

Database.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package Database;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.util.ArrayList;
import java.util.Vector;

public class Database {
   
    private String className;
    private String connectionUrl;
    private String userName;
    private String passWord;
   
    public Database (String cName, String url, String uName, String pName){
        className = cName;
        connectionUrl = url;
        userName = uName;
        passWord = pName;
    }
   
    // 1. Create Connection
    public Connection createConnection(){
        Connection connection = null;
        // (1) Load JDBC Driver
        try {
            System.out.println("Loading JDBC Driver...");
            Class.forName(className);
            System.out.println("JDBC loaded successfully.");
        }
        catch (ClassNotFoundException e) {
            throw new RuntimeException(e);
        }
        // (2) Create connection
        try {
            System.out.println("Connecting to Database ...");
            connection = DriverManager.getConnection(connectionUrl, userName, passWord);
        }
        catch (SQLException e){
            System.out.println(e.toString());
        }
        return connection;
    }
   
    // 2. Close Connection
    public void closeConnection(Connection conn) {
        try {
            conn.close();
            System.out.println("Database Connection Closed");
        }
        catch (SQLException e){
            System.out.println(e.toString());
        }
    }
   
    // 3. Get tables in target schema
    public Vector<String> getTables(String sql, int columnIndex, Connection conn) {
        Vector<String> tables = new Vector<String>();
        Statement stmt = null;
        try {
            stmt = conn.createStatement();
            ResultSet rs = stmt.executeQuery(sql);
            while (rs.next()) {
                tables.add(rs.getString(columnIndex));
                // System.out.println(rs.getString(columnIndex));
            }
        }
        catch (SQLException e){
            System.out.println(e.toString());
        }
        return tables; 
    }
   
    // 4. Get Column Metadata
    public ArrayList<String> getColumns (String table, String schema, Connection conn) {
        ArrayList<String> columns = new ArrayList<String>();
        Statement stmt = null;
        try {
            // 4-1 Execute query and get metadata

            stmt = conn.createStatement();
            String query = "";
            if (schema == "None") {
                query = "Select * From " + table + " limit 1;";
            }
            else {
                query = "Select * From " + schema + "." + table + " limit 1;";
            }
            ResultSet rs = stmt.executeQuery(query);
            ResultSetMetaData mt = rs.getMetaData();
            int columnCount = mt.getColumnCount();
            // System.out.println("No of Columns is : " + columnCount);
           
            // 4-2 Get column string and append to ArrayList
            for (int i = 1; i <= columnCount; i++) {
                columns.add(mt.getColumnLabel(i).toLowerCase());
            }
        }
        catch (SQLException e) {
            System.out.println(e.toString());
        }
        return columns;
    }
}

Main.java

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package CompareColumn;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Vector;

import Database.Database;

public class CompareColumn {
   
    public static ArrayList<String> getAddedColumns(ArrayList<String> source,
                            ArrayList<String> target){
        ArrayList<String> addedColumns = new ArrayList<String>();
        Iterator<String> iter = source.iterator();
        while (iter.hasNext()) {
            String col = iter.next();
            if (!target.contains(col)) {
                addedColumns.add(col);
            }
        }
        return addedColumns;       
    }
   
    public static ArrayList<String> getDroppedColumns(ArrayList<String> source,
                            ArrayList<String> target){
        ArrayList<String> droppedColumns = new ArrayList<String>();
        Iterator<String> iter = target.iterator();
        while (iter.hasNext()) {
            String col = iter.next();
            if (!source.contains(col)) {
                droppedColumns.add(col);
            }
        }
        return droppedColumns;
    }

    public static void main(String[] args) {
       
        // Set variable from argument
       
        String targetJDBCClassName = args[0];
        String targetDbUrl = args[1];
        String targetDbUser = args[2];
        String targetDbPass = args[3];
        String targetSchema = args[4];
       
        String sourceJDBCClassName = args[5];
        String sourceDbUrl = args[6];
        String sourceDbUser = args[7];
        String sourceDbPass = args[8];
        String sourceSchema = args[9];
        String sourceSystem = args[10];

        String filePath = args[11];
       
       
        // (1) Prepare file for writing
        try {
            FileWriter outFile = new FileWriter(filePath);
            PrintWriter outText = new PrintWriter(outFile);
            outText.println(""Source","Table","Descr","Columns"");
           
            // (2) Connect to database and compare columns between source & target
           
               
            Database targetDb = new Database(targetJDBCClassName, targetDbUrl,
                            targetDbUser, targetDbPass);
            Connection targetConnection = targetDb.createConnection();
           
            Database sourceDb = new Database(sourceJDBCClassName, sourceDbUrl,
                            sourceDbUser, sourceDbPass);
            Connection sourceConnection = sourceDb.createConnection();
           
            Vector<String> targetTables = targetDb.getTables(
                String.format("SELECT table_name FROM information_schema.tables WHERE table_schema = '%s';",
                targetSchema), 1, targetConnection);
           
            Iterator<String> ilist = targetTables.iterator();
           
            while (ilist.hasNext()) {
                String tableName = (String) ilist.next();
                System.out.println(tableName);
               
                ArrayList<String> targetColumns = targetDb.getColumns(tableName,
                                    targetSchema, targetConnection);
                ArrayList<String> sourceColumns = sourceDb.getColumns(tableName,
                                    sourceSchema, sourceConnection);
               
                ArrayList<String> added = getAddedColumns(sourceColumns, targetColumns);
                ArrayList<String> dropped = getDroppedColumns(sourceColumns, targetColumns);
               
                if (added.size() > 0 ) {
                    System.out.println("Columns added to " + tableName + ": " + added);
                    outText.println(String.format(""%s","%s","Added","%s"",
                                sourceSystem, tableName, added));
                }
                else {
                    // System.out.println("No column added to " + tableName);
                }
                if (dropped.size() > 0 && dropped.size() < targetColumns.size()) {
                    System.out.println("Columns dropped from " + tableName + ": " + dropped);
                    outText.println(String.format(""%s","%s","Dropped","%s"",
                                    sourceSystem, tableName, dropped));
                }
                else if (dropped.size() == targetColumns.size()) {
                    outText.println(String.format(""%s","%s","Table Name Mismatch",""",
                                    sourceSystem, tableName));
                }
                else {
                    // System.out.println("No column dropped from " + tableName);
                }
            }
            outText.close();
            targetDb.closeConnection(targetConnection);
            sourceDb.closeConnection(sourceConnection);
        }
        catch (IOException error) {
            System.out.println(error);
        }
        System.exit(0);
    }
}
Data Engineering
Sending XML Payload and Converting XML Response to JSON with Python

If you need to interact with a REST endpoint that takes a XML string as a payload and returns another XML string as a response, this is the quick guide if you want to use Python. If you want to do it with Node.js, you can check out the post …

Data Engineering
Sending XML Payload and Converting XML Response to JSON with Node.js

Here is the quick Node.js example of interacting with a rest API endpoint that takes XML string as a payload and return with XML string as response. Once we get the response, we will convert it to a JSON object. For this example, we will use the old-school QAS (Quick …

Data Engineering
Downloading All Public GitHub Gist Files

I used to use plug-ins to render code blocks for this blog. Yesterday, I decided to move all the code into GitHub Gist and inject them from there. Using a WordPress plugin to render code blocks can be problematic when update happens. Plugins might not be up to date. It …