我使用ocurl和nethtml完成了这两件事
ocurl来读取URL的内容(这里有大量的属性;这是最小的),
let string_of_uri uri =
try let connection = Curl.init () and write_buff = Buffer.create 1763 in
Curl.set_writefunction connection
(fun x -> Buffer.add_string write_buff x; String.length x);
Curl.set_url connection uri;
Curl.perform connection;
Curl.global_cleanup ();
Buffer.contents write_buff;
with _ -> raise (IO_ERROR uri)
Run Code Online (Sandbox Code Playgroud)
从nethtml ; (您可能需要设置DTD Nethtml.parse)
let parse_html_string uri =
let ch = new Netchannels.input_string (string_of_uri uri) in
let docs = Nethtml.parse ?return_pis:(Some false) ch in
ch # close_in ();
docs
Run Code Online (Sandbox Code Playgroud)
干杯!
| 归档时间: |
|
| 查看次数: |
1453 次 |
| 最近记录: |