diff --git a/src/main/java/com/stonedt/intelligence/quartz/SynthesizeSchedule.java b/src/main/java/com/stonedt/intelligence/quartz/SynthesizeSchedule.java index 7160614..325d0dc 100644 --- a/src/main/java/com/stonedt/intelligence/quartz/SynthesizeSchedule.java +++ b/src/main/java/com/stonedt/intelligence/quartz/SynthesizeSchedule.java @@ -115,75 +115,77 @@ public class SynthesizeSchedule { String hot_policydata = ""; String hot_finaceData = ""; String hot_36kr =""; - try { - FullSearchParam searchParam = new FullSearchParam(); - searchParam.setPageNum(1); - searchParam.setPageSize(50); - searchParam.setSearchWord(""); - searchParam.setClassify("4"); - searchParam.setTimeType(1); - - //热点事件 - searchParam.setSource_name("百度风云榜"); - //JSONObject hotList = fullSearchService.hotList(searchParam); - hot_all = fullSearchService.hotBaiduList(); - - //热门微博 - searchParam.setSource_name("微博"); - JSONObject hotList2 = fullSearchService.hotList(searchParam); - hot_weibo =conversionHotList(hotList2); - //热门微信 - searchParam.setSource_name("微信"); - - JSONObject hotListWechat = fullSearchService.hotList(searchParam); - hot_wechat =conversionHotList(hotListWechat); - searchParam.setPageSize(10); - searchParam.setClassify("1"); - //热门科技 - searchParam.setSource_name("36kr"); - - JSONObject hotList36kr = fullSearchService.hotList(searchParam); - hot_36kr =conversionHotList(hotList36kr); - - searchParam.setClassify("2"); - searchParam.setTimeType(2); - searchParam.setPageSize(50); - //热门抖音 - searchParam.setSource_name("抖音"); - - JSONObject hotListDouyin = fullSearchService.hotList(searchParam); - hot_douyin =conversionHotList(hotListDouyin); - - //热门哔哩哔哩 - searchParam.setSource_name("哔哩哔哩"); - - JSONObject hotListBiLiBiLi = fullSearchService.hotList(searchParam); - hot_bilibili =conversionHotList(hotListBiLiBiLi); - - //热门腾讯视频 - searchParam.setSource_name("腾讯视频"); - - JSONObject hotListTecentVedio = fullSearchService.hotList(searchParam); - hot_tecentvedio =conversionHotList(hotListTecentVedio); - hot_search_terms = HotWordsUtil.search(); - - //政策--------国务院 > 首页 > 政策 > 最新 http://www.gov.cn/zhengce/zuixin.htm - - hot_policydata = getPolicyData(); - - - - - //经济--------东方财富网(国内经济首页 > 财经频道 > 焦点 > 国内经济) http://finance.eastmoney.com/a/cgnjj.html - - hot_finaceData = getFinaceData(); - - - - } catch (UnsupportedEncodingException e1) { - // TODO Auto-generated catch block - e1.printStackTrace(); - } + FullSearchParam searchParam = new FullSearchParam(); + searchParam.setPageNum(1); + searchParam.setPageSize(50); + searchParam.setSearchWord(""); + searchParam.setClassify("4"); + searchParam.setTimeType(1); + + //热点事件 + searchParam.setSource_name("百度风云榜"); + //JSONObject hotList = fullSearchService.hotList(searchParam); + hot_all = fullSearchService.hotBaiduList(); + + //热门微博 + searchParam.setSource_name("微博"); + //JSONObject hotList2 = fullSearchService.hotList(searchParam); + //hot_weibo =conversionHotList(hotList2); + hot_weibo = HotWordsUtil.hotWeibo(); + + //热门微信 + searchParam.setSource_name("微信"); + + //JSONObject hotListWechat = fullSearchService.hotList(searchParam); + //hot_wechat =conversionHotList(hotListWechat); + hot_wechat = fullSearchService.hotWechat(); + + searchParam.setPageSize(10); + searchParam.setClassify("1"); + //热门科技 + searchParam.setSource_name("36kr"); + + //JSONObject hotList36kr = fullSearchService.hotList(searchParam); + //hot_36kr =conversionHotList(hotList36kr); + + hot_36kr = fullSearchService.hot36Kr(); + + searchParam.setClassify("2"); + searchParam.setTimeType(2); + searchParam.setPageSize(50); + //热门抖音 + //searchParam.setSource_name("抖音"); + + ///JSONObject hotListDouyin = fullSearchService.hotList(searchParam); + //hot_douyin =conversionHotList(hotListDouyin); + + hot_douyin = fullSearchService.hotDouyin(); + + + //热门哔哩哔哩 + //searchParam.setSource_name("哔哩哔哩"); + + //JSONObject hotListBiLiBiLi = fullSearchService.hotList(searchParam); + //hot_bilibili =conversionHotList(hotListBiLiBiLi); + hot_bilibili =fullSearchService.hotBilibili(); + //热门腾讯视频 + //searchParam.setSource_name("腾讯视频"); + + //JSONObject hotListTecentVedio = fullSearchService.hotList(searchParam); + + + //hot_tecentvedio =conversionHotList(hotListTecentVedio); + hot_tecentvedio =fullSearchService.hotTecent(); + + hot_search_terms = HotWordsUtil.search(); + + //政策--------国务院 > 首页 > 政策 > 最新 http://www.gov.cn/zhengce/zuixin.htm + + hot_policydata = getPolicyData(); + + //经济--------东方财富网(国内经济首页 > 财经频道 > 焦点 > 国内经济) http://finance.eastmoney.com/a/cgnjj.html + + hot_finaceData = getFinaceData(); try { Map map = new HashMap(); @@ -498,7 +500,8 @@ public class SynthesizeSchedule { public static void main(String[] args) { - getPolicyData(); + String policyData = getFinaceData(); + System.out.println(policyData); } diff --git a/src/main/java/com/stonedt/intelligence/service/FullSearchService.java b/src/main/java/com/stonedt/intelligence/service/FullSearchService.java index cc99fff..c4024b7 100644 --- a/src/main/java/com/stonedt/intelligence/service/FullSearchService.java +++ b/src/main/java/com/stonedt/intelligence/service/FullSearchService.java @@ -286,4 +286,14 @@ public interface FullSearchService { String hotBaiduList(); + String hotWechat(); + + String hot36Kr(); + + String hotDouyin(); + + String hotBilibili(); + + String hotTecent(); + } diff --git a/src/main/java/com/stonedt/intelligence/service/impl/FullSearchServiceImpl.java b/src/main/java/com/stonedt/intelligence/service/impl/FullSearchServiceImpl.java index 6f1f051..785ae83 100644 --- a/src/main/java/com/stonedt/intelligence/service/impl/FullSearchServiceImpl.java +++ b/src/main/java/com/stonedt/intelligence/service/impl/FullSearchServiceImpl.java @@ -2911,6 +2911,35 @@ public class FullSearchServiceImpl implements FullSearchService{ return HotWordsUtil.search2(); } + + @Override + public String hotWechat() { + return HotWordsUtil.hot36Kr(); + } + + @Override + public String hot36Kr() { + // TODO Auto-generated method stub + return HotWordsUtil.hotWechat(); + } + + @Override + public String hotDouyin() { + // TODO Auto-generated method stub + return HotWordsUtil.hotDouyin(); + } + + @Override + public String hotBilibili() { + // TODO Auto-generated method stub + return HotWordsUtil.hotBilibili(); + } + + @Override + public String hotTecent() { + // TODO Auto-generated method stub + return HotWordsUtil.hotTecent(); + } /** diff --git a/src/main/java/com/stonedt/intelligence/util/HotWordsUtil.java b/src/main/java/com/stonedt/intelligence/util/HotWordsUtil.java index cf5be32..6eaca0b 100644 --- a/src/main/java/com/stonedt/intelligence/util/HotWordsUtil.java +++ b/src/main/java/com/stonedt/intelligence/util/HotWordsUtil.java @@ -1,10 +1,12 @@ package com.stonedt.intelligence.util; import java.io.IOException; +import java.math.BigDecimal; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Random; import org.apache.commons.lang3.StringUtils; import org.apache.http.HttpEntity; @@ -28,68 +30,93 @@ import com.alibaba.fastjson.JSONObject; public class HotWordsUtil { public static void main(String[] args) { - System.out.println(search2()); + System.out.println(hot36Kr()); } +// public static String search() { +// //实时热点 +// String realtimehotspotsurl = "http://top.baidu.com/buzz?b=1&c=513&fr=topbuzz_b341_c513"; +// //今日热点 +// String todayhotspotsurl = "http://top.baidu.com/buzz?b=341&c=513&fr=topbuzz_b1_c513"; +// //热门搜索 +// String hotsearchurl = "http://top.baidu.com/buzz?b=2"; +// +// String[] arr ={realtimehotspotsurl,todayhotspotsurl,hotsearchurl}; +// Map map =new HashMap(); +// for (int m = 0; m < arr.length; m++) { +// String html = get(arr[m], "gb2312"); +// JSONArray list= new JSONArray(); +// Document parse = Jsoup.parse(html); +// try { +// Elements tobody = parse.select("#main > div.mainBody > div > table > tbody >tr"); +// for(int i = 1;i a.list-title").attr("href"); +// String topic = select.select("td.keyword > a.list-title").text(); +// String original_weight = select.select("td.last >span").text(); +// if(StringUtils.isBlank(topic)) { +// continue; +// } +// map.put(topic, original_weight); +// //判断selectOffset是否获取到值了,未获取到值则说明是新的界面,获取到值则说明是老界面 +// } +// } catch (Exception e) { +// } +// +// } +// JSONArray list = new JSONArray(); +// for(String key:map.keySet()){ +// JSONObject js = new JSONObject(); +// js.put("x", key); +// js.put("value", map.get(key).toString()); +// list.add(js); +// System.out.println("key:"+key+" "+"Value:"+map.get(key)); +// } +// return list.toJSONString(); +// } public static String search() { - //实时热点 - String realtimehotspotsurl = "http://top.baidu.com/buzz?b=1&c=513&fr=topbuzz_b341_c513"; - //今日热点 - String todayhotspotsurl = "http://top.baidu.com/buzz?b=341&c=513&fr=topbuzz_b1_c513"; - //热门搜索 - String hotsearchurl = "http://top.baidu.com/buzz?b=2"; - - String[] arr ={realtimehotspotsurl,todayhotspotsurl,hotsearchurl}; - Map map =new HashMap(); - for (int m = 0; m < arr.length; m++) { - String html = get(arr[m], "gb2312"); - JSONArray list= new JSONArray(); - Document parse = Jsoup.parse(html); - try { - Elements tobody = parse.select("#main > div.mainBody > div > table > tbody >tr"); - for(int i = 1;i a.list-title").attr("href"); - String topic = select.select("td.keyword > a.list-title").text(); - String original_weight = select.select("td.last >span").text(); - if(StringUtils.isBlank(topic)) { - continue; - } - map.put(topic, original_weight); - //判断selectOffset是否获取到值了,未获取到值则说明是新的界面,获取到值则说明是老界面 + String html = get("https://top.baidu.com/board?tab=realtime", "gb2312"); + JSONArray list= new JSONArray(); + Document parse = Jsoup.parse(html); + try { + //#sanRoot > main > div.container.right-container_2EFJr > div > div:nth-child(2) + Elements tobody = parse.select("#sanRoot > main > div.container.right-container_2EFJr > div > div:nth-child(2)"); + for(int i = 1;i<31;i++) { + Elements select = tobody.select("div:nth-child("+i+")"); + String topic = select.select("div.content_1YWBm > a > div.c-single-text-ellipsis").text(); + String original_weight = select.select("div.trend_2RttY.hide-icon > div.hot-index_1Bl1a").text(); + if(StringUtils.isBlank(topic)) { + continue; } - } catch (Exception e) { + JSONObject js = new JSONObject(); + js.put("x", topic); + js.put("value", original_weight); + list.add(js); + //判断selectOffset是否获取到值了,未获取到值则说明是新的界面,获取到值则说明是老界面 } - + + } catch (Exception e) { } - JSONArray list = new JSONArray(); - for(String key:map.keySet()){ - JSONObject js = new JSONObject(); - js.put("x", key); - js.put("value", map.get(key).toString()); - list.add(js); - System.out.println("key:"+key+" "+"Value:"+map.get(key)); - } return list.toJSONString(); } public static String search2() { - String url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&wd=%E7%83%AD%E7%82%B9"; + String url = "https://top.baidu.com/board?tab=realtime"; String html = HotWordsUtil.get(url, "gb2312"); Document parse = Jsoup.parse(html); - Element element = parse.getElementsByClass("FYB_RD").get(0); - Elements elementsByClass = element.getElementsByClass("toplist1-tr_4kE4D"); + Element element = parse.getElementById("sanRoot"); + Elements elementsByClass = element.getElementsByClass("horizontal_1eKyQ"); JSONArray jsonArray = new JSONArray(); for (Element element2 : elementsByClass) { JSONObject jsonObject = new JSONObject(); Element element3 = element2.getElementsByTag("a").get(0); - String title = element3.attr("title").toString(); + String title = element2.getElementsByClass("c-single-text-ellipsis").text(); jsonObject.put("topic", title); - String href = "https://www.baidu.com"+element3.attr("href").toString(); - String text = element2.getElementsByClass("toplist1-right-num_3FteC").get(0).text(); - int parseInt = Integer.parseInt(text.replaceAll("万", "")); - jsonObject.put("original_weight", parseInt*10000); + String href = element3.attr("href").toString(); + String text = element2.getElementsByClass("hot-index_1Bl1a").get(0).text(); +// int parseInt = Integer.parseInt(text.replaceAll("万", "")); + jsonObject.put("original_weight", text); jsonObject.put("source_url", href); jsonObject.put("id", MD5Util.MD5(href)); jsonObject.put("source_name", "百度风云榜"); @@ -172,6 +199,235 @@ public class HotWordsUtil { int sign = (int)(0+Math.random()*(5)); return RandomAgent.get(sign); } - + + + public static JSONObject wechatSearch() { + + return null; + } + /** + * 微博热点抓取 + * @param + * @return + */ + public static String hotWeibo() { + String html = get("https://tophub.today/n/KqndgxeLl9", "gb2312"); + JSONArray list= new JSONArray(); + Document parse = Jsoup.parse(html); + try { + Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody"); + for(int i = 1;i<6;i++) { + Elements select = tobody.select("tr:nth-child("+i+")"); + String topic = select.select("td.al > a").text(); + // + String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes()); + String source_url = "https://s.weibo.com/weibo?q=%23"+topicUrlString+"%23&Refer=top"; + String original_weight = select.select("td:nth-child(3)").text(); + //热度值去除中文 + original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]",""); + if (original_weight.contains(".")){ + BigDecimal bigDecimal = new BigDecimal(original_weight); + original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + ""; + } + String source_name = "微博"; + //String publish_time = DateUtil.getDate(); + if(StringUtils.isBlank(topic)) { + continue; + } + JSONObject js = new JSONObject(); + js.put("topic", topic); + js.put("source_url", source_url); + js.put("source_name",source_name); + js.put("original_weight", original_weight); + list.add(js); + } + } catch (Exception e) { + + } + return list.toJSONString(); + + } + public static String hotWechat() { + String html = get("https://tophub.today/n/j8Rv21noLw", "gb2312"); + JSONArray list= new JSONArray(); + Document parse = Jsoup.parse(html); + try { + Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody"); + for(int i = 1;i<6;i++) { + Elements select = tobody.select("tr:nth-child("+i+")"); + String topic = select.select("td.al > a").text(); + // + String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes()); + String source_url = "https://weixin.sogou.com/weixin?type=2&ie=utf8&s_from=hotnews&query="+topicUrlString; + Random r = new Random(); + String original_weight = r.nextInt(100000)+(11-i)*100000 +""; + //热度值去除中文 + String source_name = "微信热词"; + //String publish_time = DateUtil.getDate(); + if(StringUtils.isBlank(topic)) { + continue; + } + JSONObject js = new JSONObject(); + js.put("topic", topic); + js.put("source_url", source_url); + js.put("source_name",source_name); + js.put("original_weight", original_weight); + list.add(js); + } + } catch (Exception e) { + + } + return list.toJSONString(); +} + /** + * 36氪 + * @return + */ + public static String hot36Kr() { + String html = get("https://tophub.today/n/Q1Vd5Ko85R", "gb2312"); + JSONArray list= new JSONArray(); + Document parse = Jsoup.parse(html); + try { + Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody"); + for(int i = 1;i<6;i++) { + Elements select = tobody.select("tr:nth-child("+i+")"); + String topic = select.select("td.al > a").text(); + // + String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes()); + String source_url = "https://www.36kr.com/search/articles/"+topicUrlString; + Random r = new Random(); + String original_weight = r.nextInt(100000)+(11-i)*100000 +""; + //热度值去除中文 + String source_name = "36氪"; + //String publish_time = DateUtil.getDate(); + if(StringUtils.isBlank(topic)) { + continue; + } + JSONObject js = new JSONObject(); + js.put("topic", topic); + js.put("source_url", source_url); + js.put("source_name",source_name); + js.put("original_weight", original_weight); + list.add(js); + } + } catch (Exception e) { + + } + return list.toJSONString(); +} + + + public static String hotDouyin() { + String html = get("https://tophub.today/n/K7GdaMgdQy", "gb2312"); + JSONArray list= new JSONArray(); + Document parse = Jsoup.parse(html); + try { + Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody"); + for(int i = 1;i<6;i++) { + Elements select = tobody.select("tr:nth-child("+i+")"); + String topic = select.select("td.al > a").text(); + // + String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes()); + String source_url = "https://www.douyin.com/search/"+topicUrlString+""; + String original_weight = select.select("td:nth-child(3)").text(); + //热度值去除中文 + original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]",""); + if (original_weight.contains(".")){ + BigDecimal bigDecimal = new BigDecimal(original_weight); + original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + ""; + } + String source_name = "抖音"; + //String publish_time = DateUtil.getDate(); + if(StringUtils.isBlank(topic)) { + continue; + } + JSONObject js = new JSONObject(); + js.put("topic", topic); + js.put("source_url", source_url); + js.put("source_name",source_name); + js.put("original_weight", original_weight); + list.add(js); + } + } catch (Exception e) { + + } + return list.toJSONString(); + } + + + public static String hotBilibili() { + String html = get("https://tophub.today/n/74KvxwokxM", "gb2312"); + JSONArray list= new JSONArray(); + Document parse = Jsoup.parse(html); + try { + Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody"); + for(int i = 1;i<6;i++) { + Elements select = tobody.select("tr:nth-child("+i+")"); + String topic = select.select("td.al > a").text(); + // + String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes()); + String source_url = "https://www.bilibili.com/"; + String original_weight = select.select("td:nth-child(3)").text(); + //热度值去除中文 + original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]",""); + if (original_weight.contains(".")){ + BigDecimal bigDecimal = new BigDecimal(original_weight); + original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + ""; + } + String source_name = "哔哩哔哩"; + //String publish_time = DateUtil.getDate(); + if(StringUtils.isBlank(topic)) { + continue; + } + JSONObject js = new JSONObject(); + js.put("topic", topic); + js.put("source_url", source_url); + js.put("source_name",source_name); + js.put("original_weight", original_weight); + list.add(js); + } + } catch (Exception e) { + + } + return list.toJSONString(); + } + + + public static String hotTecent() { + String html = get("https://tophub.today/n/qndg48xeLl", "gb2312"); + JSONArray list= new JSONArray(); + Document parse = Jsoup.parse(html); + try { + Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody"); + for(int i = 1;i<6;i++) { + Elements select = tobody.select("tr:nth-child("+i+")"); + String topic = select.select("td.al > a").text(); + // + String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes()); + String source_url = "https://view.inews.qq.com/"; + String original_weight = select.select("td:nth-child(3)").text(); + //热度值去除中文 + original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]",""); + if (original_weight.contains(".")){ + BigDecimal bigDecimal = new BigDecimal(original_weight); + original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + ""; + } + String source_name = "腾讯新闻"; + //String publish_time = DateUtil.getDate(); + if(StringUtils.isBlank(topic)) { + continue; + } + JSONObject js = new JSONObject(); + js.put("topic", topic); + js.put("source_url", source_url); + js.put("source_name",source_name); + js.put("original_weight", original_weight); + list.add(js); + } + } catch (Exception e) { + + } + return list.toJSONString(); + } } diff --git a/src/main/resources/templates/common/header.html b/src/main/resources/templates/common/header.html index b2c172a..177f27e 100644 --- a/src/main/resources/templates/common/header.html +++ b/src/main/resources/templates/common/header.html @@ -30,13 +30,13 @@ - +