// Uses Internet Explorer to programmatically retrieve contents // of a web-site, walk the tree-structure of the HTML (DOM) // and display the structure in a JTree. import iexplorer.InternetExplorer; import iexplorer.IWebBrowserApp; import mshtml.IHTMLElement; import mshtml.IHTMLElementCollection; import mshtml.IHTMLDocument3; import ezjcom.JComVariant; import ezjcom.JComObject; import javax.swing.JFrame; import javax.swing.JPanel; import javax.swing.JTree; import javax.swing.JButton; import javax.swing.JTextField; import javax.swing.JLabel; import javax.swing.JScrollPane; import java.awt.GridLayout; import java.awt.Dimension; import java.awt.Toolkit; import java.awt.BorderLayout; import java.awt.event.ActionListener; import java.awt.event.ActionEvent; import javax.swing.tree.DefaultTreeModel; import javax.swing.tree.DefaultMutableTreeNode; public class TreeWalk extends JFrame { IWebBrowserApp app = null; WebLoadListener webLoadListener; DefaultMutableTreeNode root = new DefaultMutableTreeNode( "Enter URL and click Navigate" ); JTree tree = new JTree( root ); JTextField url = new JTextField( 24 ); static final int FRAME_WIDTH = 600; static final int FRAME_HEIGHT = 400; public TreeWalk( String title ) { super( title ); } // This method is called when the Navigate button is clicked. // The URL is loaded, its contents are analyzed and displayed // in a tree structure. void onButtonClick() { try { String urlText = url.getText(); root.setUserObject( urlText ); // Navigate to the URL typed. System.out.println( "Retrieving " + urlText ); webLoadListener.initialize(); synchronized ( webLoadListener ) { app.Navigate( urlText ); webLoadListener.wait(); } System.out.println( "Examining contents of " + urlText ); // Walk the HTML tree htmlTreeWalk(); ((DefaultTreeModel) tree.getModel()).reload(); } catch (Exception ex) { ex.printStackTrace(); } } // Initialize Internet Explorer and GUI void initialize() { try { // Instantiate Internet Explorer InternetExplorer ie = new InternetExplorer(); // Get IWebBrowserApp app = ie.getIWebBrowserApp(); // HINT: While developing, make the browser visible!! // app.setVisible( true ); // NOTE: When browser is not visible, the following workaround must // be added for an Internet Explorer bug // (http://support.microsoft.com/kb/259935) // Remove it or comment it out if setting the browser visible. app.setLeft( - app.getWidth()); // If IE is trying to load any default pages, stop it. app.Stop(); // Attach the events listener. webLoadListener = new WebLoadListener(); ie.addJComEventListener( webLoadListener ); // Setup a JTree in the JFrame, along with a pane for entering URLs getContentPane().setLayout( new BorderLayout()); JPanel panel = new JPanel( new GridLayout(1,0)); panel.add( tree ); JScrollPane scrollView = new JScrollPane( panel ); getContentPane().add( scrollView, BorderLayout.CENTER ); JButton button = new JButton(); JLabel label = new JLabel( "URL: " );; JPanel buttonPanel = new JPanel(); button.setText( "Navigate" ); buttonPanel.add( label ); buttonPanel.add( url ); buttonPanel.add( button ); getContentPane().add( buttonPanel, BorderLayout.SOUTH ); button.addActionListener( new ActionListener() { public void actionPerformed( ActionEvent e ) { onButtonClick(); } }); // Center in frame, set visible setSize( FRAME_WIDTH, FRAME_HEIGHT ); Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize(); setLocation(( screenSize.width - FRAME_WIDTH ) / 2, ( screenSize.height - FRAME_HEIGHT ) / 2 ); setVisible(true); } catch (Exception ex) { ex.printStackTrace(); } } // Walk the nodes of the HTML tree structure recursively, // adding to the JTree. void recursiveWalk( IHTMLElement element, DefaultMutableTreeNode node ) { try { JComObject obj = element.getChildren(); boolean leafNode = true; if ( obj != null ) { IHTMLElementCollection children = (IHTMLElementCollection) element.getChildren().JComCoerceObjectToAnotherType( IHTMLElementCollection.class ); // Process all the children. if ( children != null && children.getLength() > 0 ) { for ( int i = 0; i < children.getLength(); i++ ) { IHTMLElement child = (IHTMLElement) children.item( new JComVariant( i )).JComCoerceObjectToAnotherType( IHTMLElement.class ); DefaultMutableTreeNode newNode = new DefaultMutableTreeNode( child.getTagName()); node.add( newNode ); recursiveWalk( child, newNode ); } leafNode = false; // Node has children } } // Make some guesses to see if it is a textual node. String tag = element.getTagName(); if ( tag != null && ( tag.equalsIgnoreCase( "P" ) || tag.equalsIgnoreCase( "LI" ))) { // Add text for paragraph nodes. DefaultMutableTreeNode newNode = new DefaultMutableTreeNode( element.getInnerText()); node.add( newNode ); } else if ( leafNode ) { // Leaf node, add HTML as a child node. DefaultMutableTreeNode newNode = new DefaultMutableTreeNode( element.getOuterHTML()); node.add( newNode ); } } catch (Exception ex) { ex.printStackTrace(); } } // Start point of the recursive tree walk void htmlTreeWalk() { try { // Now retrieve and examine the contents of the website. IHTMLDocument3 doc3 = (IHTMLDocument3) app.getDocument().JComCoerceObjectToAnotherType( IHTMLDocument3.class ); // Get the document root element IHTMLElement docElement = (IHTMLElement) doc3.getDocumentElement().JComCoerceObjectToAnotherType( IHTMLElement.class ); // Walk the root element, adding leafs to the tree DefaultMutableTreeNode node = new DefaultMutableTreeNode( docElement.getTagName()); root.removeAllChildren(); root.add( node ); recursiveWalk( docElement, node ); } catch (Exception ex) { ex.printStackTrace(); } } // Standard mechanism for initializing GUI with thread safety. private static void createAndShowGUI() { try { // Create and initialize a TreeWalk frame TreeWalk frame = new TreeWalk("HTML DOM Tree Walk"); frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE); frame.initialize(); } catch (Exception ex) { ex.printStackTrace(); } } public static void main(String[] args) { javax.swing.SwingUtilities.invokeLater(new Runnable() { public void run() { createAndShowGUI(); } }); } }