001/** 002 * 003 */ 004package org.openimaj.picslurper; 005 006import java.io.IOException; 007import java.sql.DriverManager; 008import java.sql.ResultSet; 009import java.sql.SQLException; 010import java.sql.Statement; 011import java.util.Date; 012import java.util.HashMap; 013import java.util.Map; 014 015import javax.xml.datatype.DatatypeConfigurationException; 016import javax.xml.datatype.DatatypeFactory; 017import javax.xml.datatype.XMLGregorianCalendar; 018 019import twitter4j.GeoLocation; 020import twitter4j.HashtagEntity; 021import twitter4j.MediaEntity; 022import twitter4j.Place; 023import twitter4j.RateLimitStatus; 024import twitter4j.Status; 025import twitter4j.URLEntity; 026import twitter4j.User; 027import twitter4j.UserMentionEntity; 028 029import com.mysql.jdbc.Connection; 030 031/** 032 * This class is a status feeder for picslurper that takes statuses from a database. 033 * The default database driver is the MySQL driver and expects a mysql URL in the constructor; 034 * e.g. 035 * jdbc:mysql://localhost/database 036 * <p> 037 * The class connects to the database lazily (that is, only when the feed is started), so the 038 * driver can be changed after construction using {@link #setDriver(String)}. 039 * <p> 040 * The schema of the table must include at least two columns: one that contains the tweet text 041 * and one that contains the created time. The default column names are expected to be 042 * <code>text</code> and <code>created_at</code> respectively. These can be changed by passing 043 * in a map, where the key is the expected column name and the value is the actual column name; 044 * e.g. <code>text -> tweet_text</code> 045 * <p> 046 * The results are paged through in steps of 25 by default, so the database must support the 047 * <code>LIMIT</code> command. 048 * 049 * @author David Dupplaw (dpd@ecs.soton.ac.uk) 050 * @created 18 Sep 2013 051 */ 052public class DatabaseStatusFeeder implements StatusFeeder 053{ 054 protected static class DatabaseFeederStatus implements Status 055 { 056 private final XMLGregorianCalendar cal; 057 private final String text; 058 059 public DatabaseFeederStatus( final String text, final XMLGregorianCalendar cal ) 060 { 061 this.text = text; 062 this.cal = cal; 063 } 064 065 /** */ 066 private static final long serialVersionUID = 1L; 067 068 @Override 069 public int compareTo( final Status o ) { return 0; } 070 071 @Override 072 public RateLimitStatus getRateLimitStatus() { return null; } 073 074 @Override 075 public int getAccessLevel() { return 0; } 076 077 @Override 078 public UserMentionEntity[] getUserMentionEntities() { return null; } 079 080 @Override 081 public URLEntity[] getURLEntities() { return null; } 082 083 @Override 084 public HashtagEntity[] getHashtagEntities() { return null; } 085 086 @Override 087 public MediaEntity[] getMediaEntities() { return null; } 088 089 @Override 090 public Date getCreatedAt() { return this.cal.toGregorianCalendar().getTime(); } 091 092 @Override 093 public long getId() { return 0; } 094 095 @Override 096 public String getText() { return this.text; } 097 098 @Override 099 public String getSource() { return null; } 100 101 @Override 102 public boolean isTruncated() { return false; } 103 104 @Override 105 public long getInReplyToStatusId() { return 0; } 106 107 @Override 108 public long getInReplyToUserId() { return 0; } 109 110 @Override 111 public String getInReplyToScreenName() { return null; } 112 113 @Override 114 public GeoLocation getGeoLocation() { return null; } 115 116 @Override 117 public Place getPlace() { return null; } 118 119 @Override 120 public boolean isFavorited() { return false; } 121 122 @Override 123 public User getUser() { return null; } 124 125 @Override 126 public boolean isRetweet() { return false; } 127 128 @Override 129 public Status getRetweetedStatus() { return null; } 130 131 @Override 132 public long[] getContributors() { return null; } 133 134 @Override 135 public long getRetweetCount() { return 0; } 136 137 @Override 138 public boolean isRetweetedByMe() { return false; } 139 140 @Override 141 public long getCurrentUserRetweetId() { return 0; } 142 143 @Override 144 public boolean isPossiblySensitive() { return false; } 145 146 } 147 148 /** The map of column names */ 149 private Map<String,String> columnNames = new HashMap<String,String>(); 150 151 /** The username to connect to the database */ 152 private final String username; 153 154 /** The password to connect to the database */ 155 private final String password; 156 157 /** The database table where the tweets are stored */ 158 private final String table; 159 160 /** The database connection that is created */ 161 private Connection connection; 162 163 /** The URL to the database */ 164 private final String url; 165 166 /** The database driver to use */ 167 private String databaseDriver = "com.mysql.jdbc.Driver"; 168 169 /** The size of each page of results to retrieve from the database */ 170 private final int pageSize = 25; 171 172 /** 173 * Create a database status feeder using the database, table, 174 * username and password provided. 175 * 176 * @param databaseURL The URL to the database 177 * @param table The table to use 178 * @param username The user name 179 * @param password The password 180 */ 181 public DatabaseStatusFeeder( final String databaseURL, final String table, 182 final String username, final String password ) 183 { 184 this.url = databaseURL; 185 this.table = table; 186 this.username = username; 187 this.password = password; 188 } 189 190 /** 191 * Create a database status feeder using the database, table, 192 * username and password provided with the given column mapping 193 * 194 * @param databaseURL The URL to the database 195 * @param table The table to use 196 * @param username The user name 197 * @param password The password 198 * @param columnNames The column mapping 199 */ 200 public DatabaseStatusFeeder( final String databaseURL, final String table, 201 final String username, final String password, final Map<String,String> columnNames ) 202 { 203 this( databaseURL, table, username, password ); 204 this.columnNames = columnNames; 205 } 206 207 /** 208 * Create a connection using the given driver for the given database. 209 * 210 * @param driver The driver 211 * @param url The URL to the database 212 * @param username The username 213 * @param password The password 214 * @return The database connection 215 * @throws ClassNotFoundException If the driver cannot be found 216 * @throws SQLException If the connection could not be created 217 */ 218 private static Connection createConnection( final String driver, final String url, 219 final String username, final String password ) 220 throws ClassNotFoundException, SQLException 221 { 222 Class.forName(driver); 223 if( username == null || password == null || 224 username.trim().length() == 0 || password.trim().length() == 0 ) 225 return (Connection) DriverManager.getConnection( url ); 226 else return (Connection) DriverManager.getConnection( url, username, password ); 227 } 228 229 /** 230 * {@inheritDoc} 231 * @see org.openimaj.picslurper.StatusFeeder#feedStatus(org.openimaj.picslurper.PicSlurper) 232 */ 233 @Override 234 public void feedStatus( final PicSlurper slurper ) throws IOException 235 { 236 try 237 { 238 // Create the connection to the database 239 this.connection = DatabaseStatusFeeder.createConnection( 240 this.databaseDriver, this.url, this.username, this.password ); 241 242 // Create a scrolling result set type 243 final Statement s = this.connection.createStatement( 244 ResultSet.TYPE_SCROLL_SENSITIVE, ResultSet.CONCUR_READ_ONLY ); 245 s.setFetchSize( this.pageSize ); 246 s.setMaxRows( this.pageSize ); 247 248 // First count how many rows there are so we know how many 249 // pages there will be. 250 String sql = "SELECT COUNT(*) FROM "+this.table; 251 ResultSet r = s.executeQuery( sql ); 252 r.next(); final int rows = r.getInt( 1 ); 253 final int pages = (int)Math.ceil(rows/(double)this.pageSize); 254 255 for( int pageNumber = 1; pageNumber <= pages; pageNumber++ ) 256 { 257 sql = "SELECT "+ 258 this.getColumn("created_at",true)+", "+ 259 this.getColumn("text",true)+ 260 " from "+this.table+ 261 " LIMIT "+(pageNumber*this.pageSize) +","+this.pageSize; 262 263 r = s.executeQuery( sql ); 264 265 // Read the results sets 266 while( r.next() ) 267 { 268 try 269 { 270 final String createdAt = r.getString(1); 271 final XMLGregorianCalendar cal = DatatypeFactory.newInstance() 272 .newXMLGregorianCalendar(createdAt); 273 final String text = r.getString(2); 274 275 final DatabaseFeederStatus status = new DatabaseFeederStatus( text, cal ); 276 slurper.handleStatus( status ); 277 } 278 catch( final DatatypeConfigurationException e ) 279 { 280 e.printStackTrace(); 281 } 282 } 283 } 284 } 285 catch( final ClassNotFoundException e ) 286 { 287 e.printStackTrace(); 288 } 289 catch( final SQLException e ) 290 { 291 e.printStackTrace(); 292 } 293 } 294 295 /** 296 * Get a column name through the column name mapping, if one exists. 297 * @param defaultName The name to look up 298 * @param tf Whether to enclose in back-ticks 299 * @return The column name to use 300 */ 301 private String getColumn( final String defaultName, final boolean tf ) 302 { 303 if( this.columnNames.get(defaultName) != null ) 304 return tf ? "`"+this.columnNames.get(defaultName)+"`" : 305 this.columnNames.get(defaultName); 306 return tf ? "`"+defaultName+"`" : defaultName; 307 } 308 309 /** 310 * Set the database driver to use. 311 * @param driverName The driver name. 312 */ 313 public void setDriver( final String driverName ) 314 { 315 this.databaseDriver = driverName; 316 } 317 318 /** 319 * @param args 320 */ 321 public static void main( final String[] args ) 322 { 323 try 324 { 325 // Connect to local database 326 final DatabaseStatusFeeder dbsf = new DatabaseStatusFeeder( 327 "jdbc:mysql://localhost:3306/swr", "tweets", "swr", "swr" ); 328 329 final PicSlurper ps = new PicSlurper(); 330 dbsf.feedStatus( ps ); 331 } 332 catch( final IOException e ) 333 { 334 e.printStackTrace(); 335 } 336 } 337}