Monday, October 6, 2014

JSOUP



import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;

public class JsoupExtractor {
  List list = new ArrayList();

  public void read(String URL) {
    try {

      Document doc = Jsoup.connect(URL).get();

      Iterator itr = doc.getElementsByClass("wikitable").select("tbody").select("tr").iterator();
      boolean flag = true;
      while (itr.hasNext()) {
        if (flag) {
          itr.next();
          flag = false;
          continue;
        }

        Element temp = itr.next();
        Elements temp2 = temp.select("td");
        String country = (temp2.get(0).select("i").select("b").select("a").attr("title"));
        if (country == null || country.equals(""))
          country = (temp2.get(0).select("b").select("a").attr("title"));

        String list = (temp2.get(4).text());

        // if (country.equals("Zambia"))
        System.out.println(country + "," + list.replaceAll("  ", ";"));

        // writeToFile(country + "," + list.replaceAll("  ", ";"));

      }


      // Elements span = table.select("a");
      // for (Element s : span)
      // System.out.println(s.attr("title"));

    } catch (Exception e) {
      e.printStackTrace();
    }
  }


  // ===========================================
  private void writeToFile(String currCountry2) {
    try {
      FileOutputStream fos = new FileOutputStream("/Users/Desktop/IB/neighbour.txt", true);
      PrintStream ps = new PrintStream(fos);
      ps.println(currCountry2);
      ps.close();
      fos.close();
    } catch (FileNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
}

Pom.xml Entry


  <groupId>org.jsoup</groupId>
  <artifactId>jsoup</artifactId>
  <version>1.7.3</version>

</dependency>