如何从javascript中的url获取顶级域(基本域)

ani*_*nos 6 javascript url

我想从javascript中的url中提取顶级域名(基本域名).例如,对于下面列出的网址列表,我需要将google.com(或google.co.in视情况而定)作为结果.

www.google.com
www.google.co.in
www.images.google.com
www.images.google.co.in
google.com
google.co.in
images.google.com
images.google.co.in

任何人都知道如何做到这一点.我想没有直接的方法来在javascript中找到基本网址.

Edu*_*rdo 12

只有当您在想要获得TLD的网址时才能使用此功能.

此功能可确保您获得顶级主机名,因为这是浏览器允许您存储cookie的主机名.所以我们基本上测试是否对于给定的前缀我们能够设置cookie,如果是这样我们返回,否则我们尝试下一个前缀,直到我们找到浏览器允许cookie的那个.

如果浏览器配置为禁止cookie,或者可能在受限制的主机名中,则会失败 localhost

function get_top_domain(){
  var i,h,
    weird_cookie='weird_get_top_level_domain=cookie',
    hostname = document.location.hostname.split('.');
  for(i=hostname.length-1; i>=0; i--) {
    h = hostname.slice(i).join('.');
    document.cookie = weird_cookie + ';domain=.' + h + ';';
    if(document.cookie.indexOf(weird_cookie)>-1){
      // We were able to store a cookie! This must be it
      document.cookie = weird_cookie.split('=')[0] + '=;domain=.' + h + ';expires=Thu, 01 Jan 1970 00:00:01 GMT;';
      return h;
    }
  }
}
Run Code Online (Sandbox Code Playgroud)

  • 多么令人惊奇的创造性思维啊!先生,谢谢你。 (2认同)

Sim*_*han 6

这取决于你需要多严谨.有效的顶级域的完整列表给出了这里,但给出的规则在这里都可能更有帮助.

一个简单的,可能不完整的正则表达式:

/[-\w]+\.(?:[-\w]+\.xn--[-\w]+|[-\w]{3,}|[-\w]+\.[-\w]{2})$/i
Run Code Online (Sandbox Code Playgroud)

用法是这样的(我对Javascript正则表达式不是很好):

var match = HOSTDOMAIN.exec('www.google.co.in');
if (match == null) {
    alert('not a valid domain!');
} else {
    domain = match[0];
}
Run Code Online (Sandbox Code Playgroud)


lin*_*ram 5

你可以尝试这个方法

var url = ' https://www.petzlover.com/us/search?pet=1&breed=262 ';

提取主机名(url,true);//petzlover.com

提取主机名(url);//www.petzlover.com

function extractHostname(url,tld) {
      let hostname;

      //find & remove protocol (http, ftp, etc.) and get hostname
      if (url.indexOf("://") > -1) {
          hostname = url.split('/')[2];
      }else {
          hostname = url.split('/')[0];
      }

      //find & remove port number
      hostname = hostname.split(':')[0];

      //find & remove "?"
      hostname = hostname.split('?')[0];

      if(tld){
        let hostnames = hostname.split('.');
        hostname = hostnames[hostnames.length-2] + '.' + hostnames[hostnames.length-1];
      }

      return hostname;
  }
Run Code Online (Sandbox Code Playgroud)

let url = 'https://www.petzlover.com/us/search?pet=1&breed=262';

let longUrl = 'https://www.fr.petzlover.com/us/search?pet=1&breed=262';

let topLevelDomain = extractHostname(url,true); //petzlover.com
let subDomain = extractHostname(url); //www.petzlover.com
let lengthySubDomain = extractHostname(longUrl); //www.fr.petzlover.com

document.getElementById('top-level-domain').innerHTML = topLevelDomain;
document.getElementById('sub-domain').innerHTML = subDomain;
document.getElementById('lengthy-sub-domain').innerHTML = lengthySubDomain;

    function extractHostname(url,tld) {
          let hostname;
    
          //find & remove protocol (http, ftp, etc.) and get hostname
          if (url.indexOf("://") > -1) {
              hostname = url.split('/')[2];
          }else {
              hostname = url.split('/')[0];
          }
    
          //find & remove port number
          hostname = hostname.split(':')[0];

          //find & remove "?"
          hostname = hostname.split('?')[0];
    
          if(tld){
            let hostnames = hostname.split('.');
            hostname = hostnames[hostnames.length-2] + '.' + hostnames[hostnames.length-1];
          }
    
          return hostname;
      }
Run Code Online (Sandbox Code Playgroud)
span{
  font-weight:bold;
  font-size:16px;
}
Run Code Online (Sandbox Code Playgroud)
<div>Top Level Domain: <span id="top-level-domain"></span> </div>
<div>Including sub Domain: <span id="sub-domain"></span> </div>
<div>Including lengthy sub Domain: <span id="lengthy-sub-domain"></span> </div>
Run Code Online (Sandbox Code Playgroud)