从维基百科中获取随机摘录(Javascript,仅限客户端)

rka*_*rer 4 javascript ajax jsonp wikipedia

我有一个网页,要求用户提供一段文字,然后对其进行一些操作.为了向懒惰用户演示,我想添加一个"我感觉很幸运"的按钮,它将从维基百科中获取一些随机文本并填充输入.

如何使用Javascript从随机的Wikipedia文章中获取一系列文本?

我发现了一些使用Wikipedia API 获取解析文章的例子,但它们往往是服务器端.我正在寻找一个完全来自客户端的解决方案,并且不会受到相同原始策略的影响.

注意随机乱码是不够的; 我需要有意义的人类可读句子.

rka*_*rer 11

我的答案建立在这里建议的技术上.

棘手的部分是制定正确的查询字符串:

http://en.wikipedia.org/w/api.php?action=query&generator=random&prop=extracts&exchars=500&format=json&callback=onWikipedia

  • generator=random 选择一个随机页面
  • prop=extractsexchars=500检索500个字符的提取
  • format=json 返回JSON格式的数据
  • callback=导致该数据被包装在一个函数调用中,因此它可以被视为任何其他数据<script>并被注入您的页面(请参阅JSONP),从而绕过跨域障碍.
  • requestid 可以选择添加,每次都有一个新值,以避免浏览器缓存过时(IE9中需要)

查询提供的页面看起来像这样(我添加了空白以便于阅读):

onWikipedia(
  {"query":
    {"pages":
      {"12362520":
        {"pageid":12362520,
         "ns":0,
         "title":"Power Building",
         "extract":"<p>The <b>Power Building<\/b> is a historic commercial building in
                    the downtown of Cincinnati, Ohio, United States. Built in 1903, it
                    was designed by Harry Hake. It was listed on the National Register
                    of Historic Places on March 5, 1999. One week later, a group of
                    buildings in the northeastern section of downtown was named a
                    historic district, the Cincinnati East Manufacturing and Warehouse
                    District; the Power Building is one of the district's contributing
                    properties.<\/p>\n<h2> Notes<\/h2>"
  } } } }
)
Run Code Online (Sandbox Code Playgroud)

当然,每次你都会得到一篇不同的文章.

这是一个完整的,可以在JSBin上试用的实例.

<HTML><BODY>

  <p><textarea id="textbox" style="width:350px; height:150px"></textarea></p>
  <p><button type="button" id="button" onclick="startFetch(100, 500)">
    Fetch random Wikipedia extract</button></p>

  <script type="text/javascript">

    var textbox = document.getElementById("textbox");
    var button = document.getElementById("button");
    var tempscript = null, minchars, maxchars, attempts;

    function startFetch(minimumCharacters, maximumCharacters, isRetry) {
      if (tempscript) return; // a fetch is already in progress
      if (!isRetry) {
        attempts = 0;
        minchars = minimumCharacters; // save params in case retry needed
        maxchars = maximumCharacters;
        button.disabled = true;
        button.style.cursor = "wait";
      }
      tempscript = document.createElement("script");
      tempscript.type = "text/javascript";
      tempscript.id = "tempscript";
      tempscript.src = "http://en.wikipedia.org/w/api.php"
        + "?action=query&generator=random&prop=extracts"
        + "&exchars="+maxchars+"&format=json&callback=onFetchComplete&requestid="
        + Math.floor(Math.random()*999999).toString();
      document.body.appendChild(tempscript);
      // onFetchComplete invoked when finished
    }

    function onFetchComplete(data) {
      document.body.removeChild(tempscript);
      tempscript = null
      var s = getFirstProp(data.query.pages).extract;
      s = htmlDecode(stripTags(s));
      if (s.length > minchars || attempts++ > 5) {
        textbox.value = s;
        button.disabled = false;
        button.style.cursor = "auto";
      } else {
        startFetch(0, 0, true); // retry
      }
    }

    function getFirstProp(obj) {
      for (var i in obj) return obj[i];
    }

    // This next bit borrowed from Prototype / hacked together
    // You may want to replace with something more robust
    function stripTags(s) {
      return s.replace(/<\w+(\s+("[^"]*"|'[^']*'|[^>])+)?>|<\/\w+>/gi, "");
    }
    function htmlDecode(input){
      var e = document.createElement("div");
      e.innerHTML = input;
      return e.childNodes.length === 0 ? "" : e.childNodes[0].nodeValue;
    }

  </script>

</BODY></HTML>
Run Code Online (Sandbox Code Playgroud)

一个缺点generator=random是你经常得到谈话页面或生成的内容,而不是实际的文章.如果任何人都可以改进查询字符串以将其限制为高质量的文章,那就太棒了!