Some numbers that indicate PostgreSQL performance using JDBC to query and insert data. All of the tests done on the same quiescent machine.
...
Code Block |
---|
/**
* @author <a href="mailto:jal@etc.to">Frits Jalvingh</a>
* Created on 8-6-17.
*/
import java.sql.*;
import java.util.*;
public class PgReadTest {
public static void main(String args[]) {
try {
Class.forName("org.postgresql.Driver");
String dbURL = "jdbc:postgresql://localhost:5432/staging_12_tst";
Properties parameters = new Properties();
parameters.put("user", "jal");
parameters.put("password", "jal");
Connection conn = DriverManager.getConnection(dbURL, parameters);
conn.setAutoCommit(false); // Important
String query = "select * from staging.boekingdetail";
long lStartTime = System.currentTimeMillis();
int size = 0;
try(PreparedStatement ps = conn.prepareStatement(query)) {
ps.setFetchSize(65536);
try(ResultSet rs = ps.executeQuery()) {
int cols = rs.getMetaData().getColumnCount();
while(rs.next()) {
for(int i = 1; i <= cols; i++) {
Object val = rs.getObject(i);
}
size++;
}
}
}
long lEndTime = System.currentTimeMillis();
long difference = lEndTime - lStartTime;
double rps = (double) size / ((double)difference / 1000);
System.out.println("Read " + size + " rows in " + difference + " milliseconds, " + rps + " rows per second");
} catch(Exception e) {
e.printStackTrace();
}
}
} |
...
Code Block |
---|
/**
* @author <a href="mailto:jal@etc.to">Frits Jalvingh</a>
* Created on 8-6-17.
*/
import java.sql.*;
import java.util.*;
public class PgInsertTest {
public static void main(String args[]) {
try {
Class.forName("org.postgresql.Driver");
String dbURL = "jdbc:postgresql://localhost:5432/datavault_12_tst";
Properties parameters = new Properties();
parameters.put("user", "jal");
parameters.put("password", "jal");
Connection conn = DriverManager.getConnection(dbURL, parameters);
conn.setAutoCommit(false); // Important
String query = "insert into datavault.h_boekingdetail(id_h,source_system_id,organisation_id,load_dts,boekingdetailid) values(?,?,?,?,?)";
long lStartTime = System.currentTimeMillis();
int size = 0;
int commitSize = 50_000;
int commitCount = 0;
int batchSize = 0;
int batchCount = 0;
try(PreparedStatement ps = conn.prepareStatement(query)) {
long now = System.currentTimeMillis();
for(int row = 0; row < 1_000_000; row++) {
ps.setInt(1, row + 1000_000);
ps.setInt(2, 1);
ps.setInt(3, 12);
ps.setTimestamp(4, new Timestamp(now));
ps.setString(5, "boe" + row + "kingdetail" + row);
size++;
commitCount++;
batchCount++;
if(batchSize <= 0) {
ps.executeUpdate();
if(commitCount > commitSize) {
commitCount = 0;
conn.commit();
}
} else {
batchCount++;
ps.addBatch();
if(batchCount >= batchSize) {
ps.executeBatch();
batchCount = 0;
if(commitCount > commitSize) {
commitCount = 0;
conn.commit();
}
}
}
}
if(batchSize > 0 && batchCount > 0) {
ps.executeBatch();
}
}
conn.commit();
long lEndTime = System.currentTimeMillis();
long difference = lEndTime - lStartTime;
double rps = (double) size / ((double)difference / 1000);
System.out.println("Inserted " + size + " rows in " + difference + " milliseconds, " + rps + " rows per second");
} catch(Exception e) {
e.printStackTrace();
}
}
}
|
...
Writing the same in Oracle is quite a lot faster. Without indexes and PK, but with full transaction logging the performance is:
- Inserted 1000000 rows in 1072 milliseconds, 1000000.0 rows per second
- Inserted 24000000 rows in 47155 milliseconds, 510638.2978723404 rows per second
The fastest way so far: using the postgreSQL COPY command
The fastest method I found so far do do inserts is by using PostgreSQL's copy command. I rewrote the earlier Java insert code as follows:
Code Block |
---|
/** * @author <a href="mailto:jal@etc.to">Frits Jalvingh</a> * Created on 8-6-17. */ import org.postgresql.*; import org.postgresql.copy.*; import java.io.*; import java.sql.*; import java.util.*; public class PgCopyTest { public static void main(String args[]) { try { Class.forName("org.postgresql.Driver"); String dbURL = "jdbc:postgresql://localhost:5432/datavault_12_tst"; Properties parameters = new Properties(); parameters.put("user", "jal"); parameters.put("password", "jal"); Connection conn = DriverManager.getConnection(dbURL, parameters); conn.setAutoCommit(false); // Important String query = "insert into datavault.h_test(id_h,source_system_id,organisation_id,load_dts,boekingdetailid) values(?,?,?,?,?)"; long lStartTime = System.currentTimeMillis(); int size = 24_000_000; int commitSize = 50_000_000; int commitCount = 0; int batchSize = 10000; int batchCount = 0; CopyManager cm = ((PGConnection) conn).getCopyAPI(); cm.copyIn("copy datavault.h_test from stdin with (format text)", new InsertReader(size)); conn.commit(); long lEndTime = System.currentTimeMillis(); long difference = lEndTime - lStartTime; double rps = (double) size / ((double)difference / 1000); System.out.println("Inserted " + size + " rows in " + difference + " milliseconds, " + rps + " rows per second"); } catch(Exception e) { e.printStackTrace(); } } } class InsertReader extends Reader { private final int m_size; private final StringBuilder m_buffer = new StringBuilder(); private int m_bufferIndex; private int m_count; public InsertReader(int count) { m_size = count; } @Override public int read(char[] cbuf, int off, int len) throws IOException { if(m_count >= m_size) return -1; int buflen = m_buffer.length(); int remainder = buflen - m_bufferIndex; if(remainder < len) { generateRecords(); buflen = m_buffer.length(); remainder = buflen - m_bufferIndex; } if(remainder > len) remainder = len; m_buffer.getChars(m_bufferIndex, m_bufferIndex + remainder, cbuf, off); m_bufferIndex += remainder; return remainder; } private void generateRecords() { m_buffer.replace(0, m_bufferIndex, ""); m_bufferIndex = 0; int left = m_size - m_count; if(left > 1000) left = 1000; while(left-- > 0) { addRecord(m_count++); } } private void addRecord(int nr) { m_buffer .append(Integer.toString(nr)) .append("\t") .append("1") .append("\t") .append("12") .append("\t") .append("2017-06-09 00:00:00") .append("\t") .append("boe" + nr + "king" + nr) .append("\n"); } @Override public void close() throws IOException { } } |
...
- The format to write each record's data in is error prone, as we need exact conversions for all possible types. Dates, for instance, are hard: you need to use the same format as defined on the db server, or set the server to some known format (which means other parallel tasks might suffer).
- It is hard to handle transactions.
Stuff that has little or no effect
Parameter changes
For these tests I set the following postgreSQL config parameters:
Parameter | Description |
---|---|
shared_buffers = 512MB | Changing this value to 8GB has no measurable effect. This seems logical as inserts should not use that much buffer space - since a commit is included the to-disk time is the limiting factor. |
Other parameters that might be of importance were set as follows:
work_mem 512MB
synchronous_commit off
commit_delay 100000
autovacuum_naptime 10min
The latter was done because I noticed autovacuum running several times during those inserts. That seemed like a bad plan.
Using unlogged tables
Setting the table to unlogged had no apparent effect.
Links
- http://postgres.cz/wiki/PostgreSQL_SQL_Tricks
- http://rostislav-matl.blogspot.nl/2011/08/fast-inserts-to-postgresql-with-jdbc.html
- https://www.depesz.com/2007/07/05/how-to-insert-data-to-database-as-fast-as-possible/
...