001/** 002 * Copyright (c) 2011, The University of Southampton and the individual contributors. 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without modification, 006 * are permitted provided that the following conditions are met: 007 * 008 * * Redistributions of source code must retain the above copyright notice, 009 * this list of conditions and the following disclaimer. 010 * 011 * * Redistributions in binary form must reproduce the above copyright notice, 012 * this list of conditions and the following disclaimer in the documentation 013 * and/or other materials provided with the distribution. 014 * 015 * * Neither the name of the University of Southampton nor the names of its 016 * contributors may be used to endorse or promote products derived from this 017 * software without specific prior written permission. 018 * 019 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 020 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 021 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 022 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 023 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 024 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 025 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 026 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 027 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 028 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 029 */ 030package org.openimaj.web; 031 032import java.net.URL; 033import java.util.concurrent.TimeoutException; 034 035import org.apache.log4j.Logger; 036import org.openimaj.image.FImage; 037import org.openimaj.image.MBFImage; 038import org.openimaj.image.colour.ColourSpace; 039 040import com.trolltech.qt.core.QSize; 041import com.trolltech.qt.core.QUrl; 042import com.trolltech.qt.gui.QApplication; 043import com.trolltech.qt.gui.QImage; 044import com.trolltech.qt.gui.QPainter; 045import com.trolltech.qt.webkit.QWebElement; 046import com.trolltech.qt.webkit.QWebElementCollection; 047import com.trolltech.qt.webkit.QWebFrame; 048import com.trolltech.qt.webkit.QWebPage; 049 050/** 051 * An offscreen web-browser that can be accessed programmatically. 052 * Allows rendering to an {@link MBFImage}, etc. 053 * 054 * @author Jonathon Hare (jsh2@ecs.soton.ac.uk) 055 * 056 */ 057public class ProgrammaticBrowser { 058 private static final Logger logger = Logger.getLogger(ProgrammaticBrowser.class); 059 060 private QWebPage webpage; 061 private QWebFrame webframe; 062 private Boolean currentLoadingStatus; 063 064 private long mainLoopSleepTime = 10; //in ms 065 066 private static boolean qapp_init = false; 067 068 /** 069 * Default constructor. Uses an {@link DefaultBrowserDelegate}, 070 * so you wont see any dialogs, etc. 071 */ 072 public ProgrammaticBrowser() { 073 this(new DefaultBrowserDelegate()); 074 } 075 076 /** 077 * Construct with given delegate object. 078 * Setting the delegate to null will enable default 079 * behavior - i.e. dialogs will be drawn on the screen! 080 * @param delegate 081 */ 082 public ProgrammaticBrowser(final BrowserDelegate delegate) { 083 synchronized (ProgrammaticBrowser.class) { 084 if (!qapp_init) { 085 QApplication.initialize(new String [] {}); 086 qapp_init = true; 087 } 088 } 089 090 webpage = new QWebPage() { 091 /* (non-Javadoc) 092 * @see com.trolltech.qt.webkit.QWebPage#javaScriptAlert(com.trolltech.qt.webkit.QWebFrame, java.lang.String) 093 */ 094 @Override 095 protected void javaScriptAlert(QWebFrame originatingFrame, String msg) { 096 if (delegate != null) 097 delegate.javaScriptAlert(originatingFrame, msg); 098 else 099 super.javaScriptAlert(originatingFrame, msg); 100 } 101 102 /* (non-Javadoc) 103 * @see com.trolltech.qt.webkit.QWebPage#javaScriptConfirm(com.trolltech.qt.webkit.QWebFrame, java.lang.String) 104 */ 105 @Override 106 protected boolean javaScriptConfirm(QWebFrame originatingFrame, String msg) { 107 if (delegate != null) 108 return delegate.javaScriptConfirm(originatingFrame, msg); 109 return super.javaScriptConfirm(originatingFrame, msg); 110 } 111 112 /* (non-Javadoc) 113 * @see com.trolltech.qt.webkit.QWebPage#javaScriptConsoleMessage(java.lang.String, int, java.lang.String) 114 */ 115 @Override 116 protected void javaScriptConsoleMessage(String message, int lineNumber, String sourceID) { 117 if (delegate != null) 118 delegate.javaScriptConsoleMessage(message, lineNumber, sourceID); 119 super.javaScriptConsoleMessage(message, lineNumber, sourceID); 120 } 121 122 /* (non-Javadoc) 123 * @see com.trolltech.qt.webkit.QWebPage#javaScriptPrompt(com.trolltech.qt.webkit.QWebFrame, java.lang.String, java.lang.String) 124 */ 125 @Override 126 protected String javaScriptPrompt(QWebFrame originatingFrame, String msg, String defaultValue) { 127 if (delegate != null) 128 delegate.javaScriptPrompt(originatingFrame, msg, defaultValue); 129 return super.javaScriptPrompt(originatingFrame, msg, defaultValue); 130 } 131 }; 132 webframe = webpage.mainFrame(); 133 134 currentLoadingStatus = null; 135 136 webpage.loadFinished.connect(this, "loadFinished(boolean)"); 137 webpage.loadStarted.connect(this, "loadStarted()"); 138 } 139 140 private void mainEventLoop() { 141 QApplication.processEvents(); 142 143 try { Thread.sleep(mainLoopSleepTime); } catch (InterruptedException e) {} 144 } 145 146 protected void loadStarted() { 147 logger.debug("Loading page " + getURL()); 148 } 149 150 protected boolean waitForLoad() { 151 try { 152 return waitForLoad(0); 153 } catch (TimeoutException e) { 154 //should never happen!!! 155 throw new RuntimeException(e); 156 } 157 } 158 159 protected void loadFinished(boolean successful) { 160 currentLoadingStatus = successful; 161 logger.info(String.format("Page load finished (%d bytes): %s (%s)", getHTML().length(), getURL(), successful ? "successful" : "error")); 162 } 163 164 private boolean waitForLoad(long timeout) throws TimeoutException { 165 mainEventLoop(); 166 167 long itime = System.currentTimeMillis(); 168 169 currentLoadingStatus = null; 170 while (currentLoadingStatus == null) { 171 if (timeout != 0 && System.currentTimeMillis() - itime > timeout) 172 throw new TimeoutException(String.format("Timeout reached: %d seconds", timeout)); 173 mainEventLoop(); 174 } 175 mainEventLoop(); 176 if (currentLoadingStatus) { 177 webpage.setViewportSize(webpage.mainFrame().contentsSize()); 178 } 179 180 return currentLoadingStatus; 181 } 182 183 /** 184 * Run the browsers main event loop for timeout milliseconds 185 * @param timeout the running time 186 */ 187 public void mainLoop(long timeout) { 188 mainEventLoop(); 189 190 long itime = System.currentTimeMillis(); 191 192 while (true) { 193 if (timeout == 0 || System.currentTimeMillis() - itime > timeout) 194 break; 195 mainEventLoop(); 196 } 197 mainEventLoop(); 198 } 199 200 /** 201 * Get the HTML of the currently loaded page 202 * @return the html as a string 203 */ 204 public String getHTML() { 205 return webframe.toHtml(); 206 } 207 208 /** 209 * Get the URL for the currently loaded page 210 * @return the url 211 */ 212 public String getURL() { 213 return webframe.url().toString(); 214 } 215 216 /** 217 * Load the page with the given URL 218 * @param url the url to load. 219 * @return true if successful; false otherwise. 220 */ 221 public boolean load(URL url) { 222 return load(url.toString()); 223 } 224 225 /** 226 * Load the page with the given URL 227 * @param url the url to load. 228 * @param timeout the amount of time to wait for the page to load before failing 229 * @return true if successful; false otherwise. 230 * @throws TimeoutException 231 */ 232 public boolean load(URL url, long timeout) throws TimeoutException { 233 return load(url.toString(), timeout); 234 } 235 236 /** 237 * Load the page with the given URL 238 * @param url the url to load. 239 * @param timeout the amount of time to wait for the page to load before failing 240 * @return true if successful; false otherwise. 241 * @throws TimeoutException 242 */ 243 public boolean load(String url, long timeout) throws TimeoutException { 244 webframe.load(new QUrl(url)); 245 return waitForLoad(timeout); 246 } 247 248 /** 249 * Load the page with the given URL 250 * @param url the url to load. 251 * @return true if successful; false otherwise. 252 */ 253 public boolean load(String url) { 254 webframe.load(new QUrl(url)); 255 return waitForLoad(); 256 } 257 258 /** 259 * Load the given html string into the browser 260 * @param html the html string 261 * @return true if successful; false otherwise. 262 */ 263 public boolean loadHTML(String html) { 264 webframe.setHtml(html); 265 return waitForLoad(); 266 } 267 268 /** 269 * Get the BODY element of the loaded page 270 * @return body element or null if it doesn't exist 271 */ 272 public QWebElement getBody() { 273 return webframe.findFirstElement("BODY"); 274 } 275 276 /** 277 * Get all DOM elements matching the given CSS selector 278 * @param selectorQuery the CSS selector 279 * @return collection of elements 280 */ 281 public QWebElementCollection findAllElements(String selectorQuery) { 282 return webframe.findAllElements(selectorQuery); 283 } 284 285 /** 286 * Get the first DOM element corresponding to the given CSS selector 287 * @param selectorQuery the CSS selector 288 * @return the first element, or null if no matching element is found 289 */ 290 public QWebElement findFirstElement(String selectorQuery) { 291 return webframe.findFirstElement(selectorQuery); 292 } 293 294 /** 295 * Get a render of the page as an image 296 * @return Rendered page image 297 */ 298 public MBFImage renderToImage() { 299 QWebElement ele = webframe.documentElement(); 300 301 if (ele == null) return null; 302 303 QSize size = ele.geometry().size(); 304 305 System.out.println(size); 306 307 if (size.width() <= 0 || size.height() <= 0) 308 return null; 309 310 QImage image = new QImage(size, QImage.Format.Format_ARGB32_Premultiplied); 311 QPainter p = new QPainter(image); 312 p.setRenderHint(QPainter.RenderHint.Antialiasing, false); 313 p.setRenderHint(QPainter.RenderHint.TextAntialiasing, false); 314 p.setRenderHint(QPainter.RenderHint.SmoothPixmapTransform, false); 315 ele.render(p); 316 p.end(); 317 318 int width = image.width(); 319 int height = image.height(); 320 321 MBFImage mbfimage = new MBFImage(width, height, ColourSpace.RGB); 322 FImage rf = mbfimage.bands.get(0); 323 FImage gf = mbfimage.bands.get(1); 324 FImage bf = mbfimage.bands.get(2); 325 for (int y=0; y<height; y++) { 326 for (int x=0; x<width; x++) { 327 int rgb = image.pixel(x, y); 328 int r = ((rgb >> 16) & 0xff); 329 int g = ((rgb >> 8) & 0xff); 330 int b = ((rgb) & 0xff); 331 332 rf.pixels[y][x] = r / 255f; 333 gf.pixels[y][x] = g / 255f; 334 bf.pixels[y][x] = b / 255f; 335 } 336 } 337 return mbfimage; 338 } 339 340 /** 341 * Get a render of the page as an image 342 * @param width 343 * @param height 344 * @return Rendered page image 345 */ 346 public MBFImage renderToImage(int width, int height) { 347 QWebElement ele = webframe.documentElement(); 348 349 if (ele == null) return null; 350 351 QSize size = ele.geometry().size(); 352 353 if (size.width() < width) width = size.width(); 354 if (size.height() < height) height = size.height(); 355 356 if (width <= 0 || height <= 0) 357 return null; 358 359 QImage image = new QImage(new QSize(width, height), QImage.Format.Format_ARGB32_Premultiplied); 360 QPainter p = new QPainter(image); 361 p.setRenderHint(QPainter.RenderHint.Antialiasing, false); 362 p.setRenderHint(QPainter.RenderHint.TextAntialiasing, false); 363 p.setRenderHint(QPainter.RenderHint.SmoothPixmapTransform, false); 364 ele.render(p); 365 p.end(); 366 367 MBFImage mbfimage = new MBFImage(width, height, ColourSpace.RGB); 368 FImage rf = mbfimage.bands.get(0); 369 FImage gf = mbfimage.bands.get(1); 370 FImage bf = mbfimage.bands.get(2); 371 for (int y=0; y<height; y++) { 372 for (int x=0; x<width; x++) { 373 int rgb = image.pixel(x, y); 374 int r = ((rgb >> 16) & 0xff); 375 int g = ((rgb >> 8) & 0xff); 376 int b = ((rgb) & 0xff); 377 378 rf.pixels[y][x] = r / 255f; 379 gf.pixels[y][x] = g / 255f; 380 bf.pixels[y][x] = b / 255f; 381 } 382 } 383 return mbfimage; 384 } 385 386 /** 387 * Get the width of the browser. The width is automatically adjusted to 388 * fit the content. 389 * @return the width in pixels 390 */ 391 public int getWidth() { 392 return webframe.contentsSize().width(); 393 } 394 395 /** 396 * Get the height of the browser. The height is automatically adjusted to 397 * fit the content. 398 * @return the height in pixels 399 */ 400 public int getHeight() { 401 return webframe.contentsSize().height(); 402 } 403}