Sean's Note: 用 Java 讀取 HTTP 網頁內容

2012年12月18日 星期二

用 Java 讀取 HTTP 網頁內容

在 JDK 的 java.net 包中已經提供了訪問 HTTP 協定的基本功能:HttpURLConnection。

Apache 也提供了對應的 HttpClient 功能更為強大也更容易使用。

HttpURLConnection 與 HttpClient 的比較可參考這篇文章 :

http://superonion.iteye.com/blog/1545496

而下面是個完整可執行的程式碼,用來讀取 Apache 的網頁資訊 :

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;

// Using version 4.2.2
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;

public class HttpClientSample{
    public   static   void  main(String[] args) throws ClientProtocolException, IOException {
     DefaultHttpClient  httpClient = new DefaultHttpClient();
    
     // Prepare a request object
     HttpGet httpget = new HttpGet("http://www.apache.org/");

     // Execute the request
     HttpResponse response = httpClient.execute(httpget);

     // Examine the response status
     System.out.println(response.getStatusLine());

     // Get hold of the response entity
     HttpEntity entity = response.getEntity();
     System.out.println(entity.getContentType());
     
     //System.out.println(EntityUtils.toString(entity));
     // If the response does not enclose an entity, there is no need
     // to worry about connection release
     if (entity != null) {
         InputStream instream = entity.getContent();
         try {
             BufferedReader reader = new BufferedReader(new InputStreamReader(instream));
             // do something useful with the response
             String str;
             while (reader != null && (str = reader.readLine()) != null)
             {
              System.out.println(str);
             }
             
         } catch (IOException ex) {

             // In case of an IOException the connection will be released
             // back to the connection manager automatically
             throw ex;

         } catch (RuntimeException ex) {

             // In case of an unexpected exception you may want to abort
             // the HTTP request in order to shut down the underlying
             // connection and release it back to the connection manager.
             httpget.abort();
             throw ex;

         } finally {
             // Closing the input stream will trigger connection release
             instream.close();

         }
     }
    }
}

沒有留言:

張貼留言