android获取网页数据的几种方式
1.HTTPclient方式
package com.example.jiaoleiqiang.getwebdata;
import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.view.View;
import android.view.Window;
import android.view.WindowManager;
import android.widget.Button;
import android.widget.TextView;
import android.widget.Toast;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainActivity extends AppCompatActivity implements View.OnClickListener {
private Button getData;private TextView webDataShow;private String webData;// private String webSite = "http://lbsyun.baidu.com/index.php?title=android-locsdk/theupdatelog";
// private String webSite = “http://lbs.qq.com/geo/log.html“;
private String webSite = “http://lbs.amap.com/api/android-location-sdk/changelog“;
private static final int MSG_SUCCESS = 0;private static final int MSG_FAILURE = 1;private Handler mHandler = null;private Thread httpClientThread;private ArrayList marks = new ArrayList<>();@Overrideprotected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); requestWindowFeature(Window.FEATURE_NO_TITLE); getWindow().setFlags(WindowManager.LayoutParams.FLAG_FULLSCREEN,WindowManager.LayoutParams.FLAG_FULLSCREEN); setContentView(R.layout.activity_main); getData = findViewById(R.id.httpclient); webDataShow = findViewById(R.id.webDataShow); getData.setOnClickListener(this); getData.setOnClickListener(this); mHandler = new Handler() { @Override public void handleMessage(Message msg) { switch (msg.what) { case MSG_SUCCESS: Toast.makeText(getApplicationContext(), "连接成功!",Toast.LENGTH_LONG).show(); Document doc = Jsoup.parse(msg.obj.toString()); webData = doc.body().text(); String re = "\\d{4}-\\d{1,2}-\\d{2}"; Pattern p = Pattern.compile(re); Matcher m = p.matcher(webData); while (m.find()) { marks.add(m.group()); } webData = webData.substring(webData.indexOf(marks.get(0)),webData.indexOf(marks.get(1))); webDataShow.setText(webData); break; case MSG_FAILURE: Toast.makeText(getApplicationContext(), "链接失败", Toast.LENGTH_LONG).show(); } } };}@Overridepublic void onClick(View view) { switch (view.getId()) { case R.id.httpclient: if (httpClientThread == null) { httpClientThread = new Thread(httpClientRunnable); httpClientThread.start(); } }}Runnable httpClientRunnable = new Runnable() { @Override public void run() { httpClientWebData(); }};private void httpClientWebData() { DefaultHttpClient httpClient = new DefaultHttpClient(); HttpGet httpGet = new HttpGet(webSite); ResponseHandler responseHandler = new BasicResponseHandler(); try { //String content = httpClient.execute(httpGet, responseHandler); String content = new String(httpClient.execute(httpGet, responseHandler).getBytes(), "utf-8"); mHandler.obtainMessage(MSG_SUCCESS, content).sendToTarget(); } catch (ClientProtocolException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); }}
}
2.URLconnection(获取失败,原因不明)
package com.example.jiaoleiqiang.getwebdata2;
import android.os.Bundle;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
public class MainActivity extends AppCompatActivity implements View.OnClickListener {
private Button getData;private TextView webData;Handler handler = new Handler() { @Override public void handleMessage(Message msg) { super.handleMessage(msg); if (msg.what == 1) { Bundle b = msg.getData(); String str = b.getString("value"); webData.setText(str); } }};@Overrideprotected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); getData = findViewById(R.id.getData); webData = findViewById(R.id.webData); getData.setOnClickListener(this);}@Overridepublic void onClick(View view) { switch (view.getId()) { case R.id.getData: new getDataThread().start(); }}class getDataThread extends Thread { @Override public void run() { try { URL mUrl = new URL("http://lbs.amap.com/api/android-location-sdk/changelog"); byte[] buf = WebUtill.getContent(mUrl, "GET", "utf-8"); String strResult = new String(buf,"utf-8"); Bundle data = new Bundle(); data.putString("value", strResult); Message msg = new Message(); msg.what = 1; msg.setData(data); handler.sendMessage(msg); System.out.println("------>"+strResult);// 直接输出内容 } catch (Exception e) { e.printStackTrace(); } finally { WebUtill.closeConnection(); } }}private static class WebUtill { static HttpURLConnection mHttpUrlConnection; static InputStream mInputStream; /** * @param url address * @param method post or get * @param codeType utf-8 or other * @return * @throws Exception */ public static byte[] getContent(URL url, String method, String codeType) throws Exception { URL mUrl = url; mHttpUrlConnection = (HttpURLConnection) mUrl.openConnection(); mHttpUrlConnection.setConnectTimeout(6000);// 设置连接超时 mHttpUrlConnection.setRequestMethod(method);// get方式 发起请求 if (mHttpUrlConnection.getResponseCode() != 200) { throw new RuntimeException("Fail to request url"); } byte[] result; mInputStream = mHttpUrlConnection.getInputStream();// 得到网络返回的流 result = readDate(mInputStream, "utf-8"); mInputStream.close(); return result; } private static byte[] readDate(InputStream input, String mode) throws IOException { byte[] buff = new byte[input.available()]; System.out.println("input 的长度:" + input.available()); input.read(buff); return buff; } public static void closeConnection() { if (mHttpUrlConnection != null) mHttpUrlConnection.disconnect(); }}
}
3.jsoup方式
package com.example.jiaoleiqiang.getwebcontent;
import android.os.Handler;
import android.os.Message;
import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.view.View;
import android.widget.Button;
import android.widget.TextView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MainActivity extends AppCompatActivity implements View.OnClickListener {
private Button updateList;private TextView gaodeUpdate;private TextView tencentUpdate;private TextView baiduUpdate;private static final int GAO_DE = 0;private static final int TENCENT = 1;private static final int BAI_DU = 2;private String gaodeData;private String tencentData;private String baiduData;private Handler mHandler = null;@Overrideprotected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); updateList = findViewById(R.id.updateList); gaodeUpdate = findViewById(R.id.gaodeUpdate); tencentUpdate = findViewById(R.id.tencentUpdate); baiduUpdate = findViewById(R.id.baiduUpdate); updateList.setOnClickListener(this); mHandler = new Handler() { @Override public void handleMessage(Message msg) { switch (msg.what) { case GAO_DE: final ArrayList gaoMarks = new ArrayList<>(); Document doc = Jsoup.parse(msg.obj.toString()); gaodeData = doc.body().text(); String re = "\\d{4}-\\d{1,2}-\\d{2}"; Pattern p = Pattern.compile(re); Matcher m = p.matcher(gaodeData); while (m.find()) { gaoMarks.add(m.group()); } gaodeData = gaodeData.substring(gaodeData.indexOf(gaoMarks.get(0)), gaodeData.indexOf(gaoMarks.get(1))); gaodeUpdate.setText(gaodeData); break; case TENCENT: final ArrayList tencentMarks = new ArrayList<>(); doc = Jsoup.parse(msg.obj.toString()); tencentData = doc.body().text(); re = "\\d{4}-\\d{2}-\\d{2}"; p = Pattern.compile(re); m = p.matcher(tencentData); while (m.find()) { tencentMarks.add(m.group()); } tencentData = tencentData.substring(tencentData.indexOf(tencentMarks.get(0)) + 10); tencentData = tencentData.substring(tencentData.indexOf(tencentMarks.get(0))); tencentData = tencentData.substring(0, tencentData.indexOf(tencentMarks.get(1))); tencentUpdate.setText(tencentData); break; case BAI_DU: doc = Jsoup.parse(msg.obj.toString()); baiduData = doc.body().text(); baiduData = baiduData.substring(0, baiduData.lastIndexOf(";") + 1); String str1 = baiduData.substring(baiduData.lastIndexOf(":") - 2); baiduData = baiduData.substring(0, baiduData.lastIndexOf(":") - 2); for (int i = 0; i < 2; i++) { String ss = baiduData.substring(baiduData.lastIndexOf(":") - 2, baiduData.lastIndexOf(":") + 1); if (str1.contains(ss)) { str1 += ""; } else { str1 = baiduData.substring(baiduData.lastIndexOf(":") - 2) + str1; baiduData = baiduData.substring(0, baiduData.lastIndexOf(":") - 2); } } baiduUpdate.setText(str1); break; default: break; } } };}@Overridepublic void onClick(View view) { switch (view.getId()) { case R.id.updateList: new Thread(gaodeRunnable).start(); new Thread(tencentRunnable).start(); new Thread(baiduRunnable).start(); break; }}Runnable gaodeRunnable = new Runnable() { @Override public void run() { String url = "http://lbs.amap.com/api/android-location-sdk/changelog"; Document doc = null; try { doc = Jsoup.connect(url).get(); String content = doc.body().text(); mHandler.obtainMessage(GAO_DE, content).sendToTarget(); } catch (IOException e) { e.printStackTrace(); } System.out.println(doc.body().text()); }};Runnable tencentRunnable = new Runnable() { @Override public void run() { String url = "http://lbs.qq.com/geo/log.html"; Document doc = null; try { doc = Jsoup.connect(url).get(); String content = doc.body().text(); mHandler.obtainMessage(TENCENT, content).sendToTarget(); } catch (IOException e) { e.printStackTrace(); } System.out.println(doc.body().text()); }};Runnable baiduRunnable = new Runnable() { @Override public void run() { String url = "http://lbsyun.baidu.com/index.php?title=android-locsdk/theupdatelog"; Document doc = null; try { doc = Jsoup.connect(url).get(); String content = doc.body().text(); mHandler.obtainMessage(BAI_DU, content).sendToTarget(); } catch (IOException e) { e.printStackTrace(); } System.out.println(doc.body().text()); }};
}
4.获取网页内容并解析出正文部分
import java.io.IOException;
import java.util.Stack;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class GetContent {
static int index;
public static void main(String[] args) { String url = "http://lbs.amap.com/api/android-location-sdk/changelog"; Document doc = null; try { doc = Jsoup.connect(url).get(); } catch (IOException e) { e.printStackTrace(); } String content = GetDocContent(doc); System.out.println(doc.body().text()); // System.out.println("网页正文如下:\n" + content);}private static String GetDocContent(Document doc) { Elements divs = doc.body().getElementsByTag("div"); int max = -1; String content = null; for (int i = 0; i < divs.size(); i++) { Element div = (Element) divs.get(i); String divContent = GetDivContent(div); if (divContent.length() > max) { max = divContent.length(); content = divContent; } } return content;}private static String GetDivContent(Element div) { StringBuilder sb = new StringBuilder(); // 考虑div里标签内容的顺序,对div子树进行深度优先搜索 Stack sk = new Stack(); sk.push(div); while (!sk.empty()) { Element e = sk.pop(); // 对于div中的div过滤掉 if (e != div && e.tagName().equals("div")) continue; // 考虑正文被包含在p标签中的情况,并且p标签里不能含有a标签 if (e.tagName().equals("p") && e.getElementsByTag("a").size() == 0) { String className = e.className(); if (className.length() != 0 && className.equals("pictext")) continue; sb.append(e.text()); sb.append("\n"); continue; } else if (e.tagName().equals("td")) { // 考虑正文被包含在td标签中的情况 if (e.getElementsByTag("div").size() != 0) continue; sb.append(e.text()); sb.append("\n"); continue; } // 将孩子节点加入栈中 Elements children = e.children(); for (int i = children.size() - 1; i >= 0; i--) { sk.push((Element) children.get(i)); } } return sb.toString();}
}
更多相关文章
- Android(安卓)流式布局(标签效果)
- Android:TabHost实现Tab切换
- Androidz之clickable
- 关于android.support.v4.app.Fragment与android.app.Fragment的
- Android(安卓)高仿iReader标签
- 常见Android知识点汇总
- Android(安卓)Studio FragmentTabHost使用
- Android(安卓)Studio使用心得 - 常见问题集锦
- Android(安卓)几个Info系列类的总结