方式一:
1 import java.net.MalformedURLException; 2 import java.net.URL; 3 import java.util.Arrays; 4 import java.util.HashSet; 5 import java.util.Set; 6 import java.util.regex.Pattern; 7 8 public class URLUtil { 9 10 private final static SetPublicSuffixSet = new HashSet (11 Arrays.asList(new String(12 "com|org|net|gov|edu|co|tv|mobi|info|asia|xxx|onion|cn|com.cn|edu.cn|gov.cn|net.cn|org.cn|jp|kr|tw|com.hk|hk|com.hk|org.hk|se|com.se|org.se")13 .split("\\|")));14 15 private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})");16 17 /**18 * 获取url的顶级域名19 * @param url20 * @return21 */22 public static String getDomainName(URL url) {23 String host = url.getHost();24 if (host.endsWith(".")){25 host = host.substring(0, host.length() - 1);26 }27 if (IP_PATTERN.matcher(host).matches()){28 return host;29 }30 31 int index = 0;32 String candidate = host;33 for (; index >= 0;) {34 index = candidate.indexOf('.');35 String subCandidate = candidate.substring(index + 1);36 if (PublicSuffixSet.contains(subCandidate)) {37 return candidate;38 }39 candidate = subCandidate;40 }41 return candidate;42 }43 44 /**45 * 获取url的顶级域名46 * @param url47 * @return48 * @throws MalformedURLException49 */50 public static String getDomainName(String url) throws MalformedURLException {51 return getDomainName(new URL(url));52 }53 54 /**55 * 判断两个url顶级域名是否相等56 * @param url157 * @param url258 * @return59 */60 public static boolean isSameDomainName(URL url1, URL url2) {61 return getDomainName(url1).equalsIgnoreCase(getDomainName(url2));62 }63 64 /**65 * 判断两个url顶级域名是否相等66 * @param url167 * @param url268 * @return69 * @throws MalformedURLException70 */71 public static boolean isSameDomainName(String url1, String url2)72 throws MalformedURLException {73 return isSameDomainName(new URL(url1), new URL(url2));74 }75 76 public static void main(String[] args) throws Exception {77 String urlStr = "http://news.hexun.com/2017-09-23/190978248.html";78 getDomainName(urlStr);79 getDomainName(new URL(urlStr));80 }81 82 }
方式二:
1 import java.net.MalformedURLException; 2 import java.net.URL; 3 import java.util.regex.Matcher; 4 import java.util.regex.Pattern; 5 6 public class DomainUtils { 7 /** 8 * 获取url的顶级域名 9 * @param10 * @return11 */12 public static String getTopDomain(String url){13 try{14 //获取值转换为小写15 String host = new URL(url).getHost().toLowerCase();//news.hexun.com16 Pattern pattern = Pattern.compile("[^\\.]+(\\.com\\.cn|\\.net\\.cn|\\.org\\.cn|\\.gov\\.cn|\\.com|\\.net|\\.cn|\\.org|\\.cc|\\.me|\\.tel|\\.mobi|\\.asia|\\.biz|\\.info|\\.name|\\.tv|\\.hk|\\.公司|\\.中国|\\.网络)");17 Matcher matcher = pattern.matcher(host);18 while(matcher.find()){19 return matcher.group();20 }21 }catch(MalformedURLException e){22 e.printStackTrace();23 }24 return null;25 }26 public static void main(String[] args) {27 System.out.println(getTopDomain("http://news.hexun.com/2017-09-23/190978248.html"));//hexun.com28 29 }30 }