Browse Source

热点更新

tags/v1.0.1-310
wangyi15124 2 years ago
parent
commit
c382f8377b
5 changed files with 416 additions and 118 deletions
  1. +73
    -70
      src/main/java/com/stonedt/intelligence/quartz/SynthesizeSchedule.java
  2. +10
    -0
      src/main/java/com/stonedt/intelligence/service/FullSearchService.java
  3. +29
    -0
      src/main/java/com/stonedt/intelligence/service/impl/FullSearchServiceImpl.java
  4. +302
    -46
      src/main/java/com/stonedt/intelligence/util/HotWordsUtil.java
  5. +2
    -2
      src/main/resources/templates/common/header.html

+ 73
- 70
src/main/java/com/stonedt/intelligence/quartz/SynthesizeSchedule.java View File

@@ -115,75 +115,77 @@ public class SynthesizeSchedule {
String hot_policydata = "";
String hot_finaceData = "";
String hot_36kr ="";
try {
FullSearchParam searchParam = new FullSearchParam();
searchParam.setPageNum(1);
searchParam.setPageSize(50);
searchParam.setSearchWord("");
searchParam.setClassify("4");
searchParam.setTimeType(1);
//热点事件
searchParam.setSource_name("百度风云榜");
//JSONObject hotList = fullSearchService.hotList(searchParam);
hot_all = fullSearchService.hotBaiduList();
//热门微博
searchParam.setSource_name("微博");
JSONObject hotList2 = fullSearchService.hotList(searchParam);
hot_weibo =conversionHotList(hotList2);
//热门微信
searchParam.setSource_name("微信");
JSONObject hotListWechat = fullSearchService.hotList(searchParam);
hot_wechat =conversionHotList(hotListWechat);
searchParam.setPageSize(10);
searchParam.setClassify("1");
//热门科技
searchParam.setSource_name("36kr");
JSONObject hotList36kr = fullSearchService.hotList(searchParam);
hot_36kr =conversionHotList(hotList36kr);
searchParam.setClassify("2");
searchParam.setTimeType(2);
searchParam.setPageSize(50);
//热门抖音
searchParam.setSource_name("抖音");
JSONObject hotListDouyin = fullSearchService.hotList(searchParam);
hot_douyin =conversionHotList(hotListDouyin);
//热门哔哩哔哩
searchParam.setSource_name("哔哩哔哩");
JSONObject hotListBiLiBiLi = fullSearchService.hotList(searchParam);
hot_bilibili =conversionHotList(hotListBiLiBiLi);
//热门腾讯视频
searchParam.setSource_name("腾讯视频");
JSONObject hotListTecentVedio = fullSearchService.hotList(searchParam);
hot_tecentvedio =conversionHotList(hotListTecentVedio);
hot_search_terms = HotWordsUtil.search();
//政策--------国务院 > 首页 > 政策 > 最新 http://www.gov.cn/zhengce/zuixin.htm
hot_policydata = getPolicyData();
//经济--------东方财富网(国内经济首页 > 财经频道 > 焦点 > 国内经济) http://finance.eastmoney.com/a/cgnjj.html
hot_finaceData = getFinaceData();
} catch (UnsupportedEncodingException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
FullSearchParam searchParam = new FullSearchParam();
searchParam.setPageNum(1);
searchParam.setPageSize(50);
searchParam.setSearchWord("");
searchParam.setClassify("4");
searchParam.setTimeType(1);
//热点事件
searchParam.setSource_name("百度风云榜");
//JSONObject hotList = fullSearchService.hotList(searchParam);
hot_all = fullSearchService.hotBaiduList();
//热门微博
searchParam.setSource_name("微博");
//JSONObject hotList2 = fullSearchService.hotList(searchParam);
//hot_weibo =conversionHotList(hotList2);
hot_weibo = HotWordsUtil.hotWeibo();
//热门微信
searchParam.setSource_name("微信");
//JSONObject hotListWechat = fullSearchService.hotList(searchParam);
//hot_wechat =conversionHotList(hotListWechat);
hot_wechat = fullSearchService.hotWechat();
searchParam.setPageSize(10);
searchParam.setClassify("1");
//热门科技
searchParam.setSource_name("36kr");
//JSONObject hotList36kr = fullSearchService.hotList(searchParam);
//hot_36kr =conversionHotList(hotList36kr);
hot_36kr = fullSearchService.hot36Kr();
searchParam.setClassify("2");
searchParam.setTimeType(2);
searchParam.setPageSize(50);
//热门抖音
//searchParam.setSource_name("抖音");
///JSONObject hotListDouyin = fullSearchService.hotList(searchParam);
//hot_douyin =conversionHotList(hotListDouyin);
hot_douyin = fullSearchService.hotDouyin();
//热门哔哩哔哩
//searchParam.setSource_name("哔哩哔哩");
//JSONObject hotListBiLiBiLi = fullSearchService.hotList(searchParam);
//hot_bilibili =conversionHotList(hotListBiLiBiLi);
hot_bilibili =fullSearchService.hotBilibili();
//热门腾讯视频
//searchParam.setSource_name("腾讯视频");
//JSONObject hotListTecentVedio = fullSearchService.hotList(searchParam);
//hot_tecentvedio =conversionHotList(hotListTecentVedio);
hot_tecentvedio =fullSearchService.hotTecent();
hot_search_terms = HotWordsUtil.search();
//政策--------国务院 > 首页 > 政策 > 最新 http://www.gov.cn/zhengce/zuixin.htm
hot_policydata = getPolicyData();
//经济--------东方财富网(国内经济首页 > 财经频道 > 焦点 > 国内经济) http://finance.eastmoney.com/a/cgnjj.html
hot_finaceData = getFinaceData();
try {
Map<String, Object> map = new HashMap<String, Object>();
@@ -498,7 +500,8 @@ public class SynthesizeSchedule {
public static void main(String[] args) {
getPolicyData();
String policyData = getFinaceData();
System.out.println(policyData);
}


+ 10
- 0
src/main/java/com/stonedt/intelligence/service/FullSearchService.java View File

@@ -286,4 +286,14 @@ public interface FullSearchService {
String hotBaiduList();
String hotWechat();
String hot36Kr();
String hotDouyin();
String hotBilibili();
String hotTecent();
}

+ 29
- 0
src/main/java/com/stonedt/intelligence/service/impl/FullSearchServiceImpl.java View File

@@ -2911,6 +2911,35 @@ public class FullSearchServiceImpl implements FullSearchService{
return HotWordsUtil.search2();
}
@Override
public String hotWechat() {
return HotWordsUtil.hot36Kr();
}
@Override
public String hot36Kr() {
// TODO Auto-generated method stub
return HotWordsUtil.hotWechat();
}
@Override
public String hotDouyin() {
// TODO Auto-generated method stub
return HotWordsUtil.hotDouyin();
}
@Override
public String hotBilibili() {
// TODO Auto-generated method stub
return HotWordsUtil.hotBilibili();
}
@Override
public String hotTecent() {
// TODO Auto-generated method stub
return HotWordsUtil.hotTecent();
}
/**


+ 302
- 46
src/main/java/com/stonedt/intelligence/util/HotWordsUtil.java View File

@@ -1,10 +1,12 @@
package com.stonedt.intelligence.util;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
@@ -28,68 +30,93 @@ import com.alibaba.fastjson.JSONObject;
public class HotWordsUtil {
public static void main(String[] args) {
System.out.println(search2());
System.out.println(hot36Kr());
}
// public static String search() {
// //实时热点
// String realtimehotspotsurl = "http://top.baidu.com/buzz?b=1&c=513&fr=topbuzz_b341_c513";
// //今日热点
// String todayhotspotsurl = "http://top.baidu.com/buzz?b=341&c=513&fr=topbuzz_b1_c513";
// //热门搜索
// String hotsearchurl = "http://top.baidu.com/buzz?b=2";
//
// String[] arr ={realtimehotspotsurl,todayhotspotsurl,hotsearchurl};
// Map<String,Object> map =new HashMap<String,Object>();
// for (int m = 0; m < arr.length; m++) {
// String html = get(arr[m], "gb2312");
// JSONArray list= new JSONArray();
// Document parse = Jsoup.parse(html);
// try {
// Elements tobody = parse.select("#main > div.mainBody > div > table > tbody >tr");
// for(int i = 1;i<tobody.size();i++) {
// Elements select = tobody.get(i).select("td");
// String rank = select.select("td.first").text();
// String source_url = select.select("td.keyword > a.list-title").attr("href");
// String topic = select.select("td.keyword > a.list-title").text();
// String original_weight = select.select("td.last >span").text();
// if(StringUtils.isBlank(topic)) {
// continue;
// }
// map.put(topic, original_weight);
// //判断selectOffset是否获取到值了,未获取到值则说明是新的界面,获取到值则说明是老界面
// }
// } catch (Exception e) {
// }
//
// }
// JSONArray list = new JSONArray();
// for(String key:map.keySet()){
// JSONObject js = new JSONObject();
// js.put("x", key);
// js.put("value", map.get(key).toString());
// list.add(js);
// System.out.println("key:"+key+" "+"Value:"+map.get(key));
// }
// return list.toJSONString();
// }
public static String search() {
//实时热点
String realtimehotspotsurl = "http://top.baidu.com/buzz?b=1&c=513&fr=topbuzz_b341_c513";
//今日热点
String todayhotspotsurl = "http://top.baidu.com/buzz?b=341&c=513&fr=topbuzz_b1_c513";
//热门搜索
String hotsearchurl = "http://top.baidu.com/buzz?b=2";
String[] arr ={realtimehotspotsurl,todayhotspotsurl,hotsearchurl};
Map<String,Object> map =new HashMap<String,Object>();
for (int m = 0; m < arr.length; m++) {
String html = get(arr[m], "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
Elements tobody = parse.select("#main > div.mainBody > div > table > tbody >tr");
for(int i = 1;i<tobody.size();i++) {
Elements select = tobody.get(i).select("td");
String rank = select.select("td.first").text();
String source_url = select.select("td.keyword > a.list-title").attr("href");
String topic = select.select("td.keyword > a.list-title").text();
String original_weight = select.select("td.last >span").text();
if(StringUtils.isBlank(topic)) {
continue;
}
map.put(topic, original_weight);
//判断selectOffset是否获取到值了,未获取到值则说明是新的界面,获取到值则说明是老界面
String html = get("https://top.baidu.com/board?tab=realtime", "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
//#sanRoot > main > div.container.right-container_2EFJr > div > div:nth-child(2)
Elements tobody = parse.select("#sanRoot > main > div.container.right-container_2EFJr > div > div:nth-child(2)");
for(int i = 1;i<31;i++) {
Elements select = tobody.select("div:nth-child("+i+")");
String topic = select.select("div.content_1YWBm > a > div.c-single-text-ellipsis").text();
String original_weight = select.select("div.trend_2RttY.hide-icon > div.hot-index_1Bl1a").text();
if(StringUtils.isBlank(topic)) {
continue;
}
} catch (Exception e) {
JSONObject js = new JSONObject();
js.put("x", topic);
js.put("value", original_weight);
list.add(js);
//判断selectOffset是否获取到值了,未获取到值则说明是新的界面,获取到值则说明是老界面
}
} catch (Exception e) {
}
JSONArray list = new JSONArray();
for(String key:map.keySet()){
JSONObject js = new JSONObject();
js.put("x", key);
js.put("value", map.get(key).toString());
list.add(js);
System.out.println("key:"+key+" "+"Value:"+map.get(key));
}
return list.toJSONString();
}
public static String search2() {
String url = "https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&wd=%E7%83%AD%E7%82%B9";
String url = "https://top.baidu.com/board?tab=realtime";
String html = HotWordsUtil.get(url, "gb2312");
Document parse = Jsoup.parse(html);
Element element = parse.getElementsByClass("FYB_RD").get(0);
Elements elementsByClass = element.getElementsByClass("toplist1-tr_4kE4D");
Element element = parse.getElementById("sanRoot");
Elements elementsByClass = element.getElementsByClass("horizontal_1eKyQ");
JSONArray jsonArray = new JSONArray();
for (Element element2 : elementsByClass) {
JSONObject jsonObject = new JSONObject();
Element element3 = element2.getElementsByTag("a").get(0);
String title = element3.attr("title").toString();
String title = element2.getElementsByClass("c-single-text-ellipsis").text();
jsonObject.put("topic", title);
String href = "https://www.baidu.com"+element3.attr("href").toString();
String text = element2.getElementsByClass("toplist1-right-num_3FteC").get(0).text();
int parseInt = Integer.parseInt(text.replaceAll("万", ""));
jsonObject.put("original_weight", parseInt*10000);
String href = element3.attr("href").toString();
String text = element2.getElementsByClass("hot-index_1Bl1a").get(0).text();
// int parseInt = Integer.parseInt(text.replaceAll("万", ""));
jsonObject.put("original_weight", text);
jsonObject.put("source_url", href);
jsonObject.put("id", MD5Util.MD5(href));
jsonObject.put("source_name", "百度风云榜");
@@ -172,6 +199,235 @@ public class HotWordsUtil {
int sign = (int)(0+Math.random()*(5));
return RandomAgent.get(sign);
}
public static JSONObject wechatSearch() {
return null;
}
/**
* 微博热点抓取
* @param
* @return
*/
public static String hotWeibo() {
String html = get("https://tophub.today/n/KqndgxeLl9", "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody");
for(int i = 1;i<6;i++) {
Elements select = tobody.select("tr:nth-child("+i+")");
String topic = select.select("td.al > a").text();
//
String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes());
String source_url = "https://s.weibo.com/weibo?q=%23"+topicUrlString+"%23&Refer=top";
String original_weight = select.select("td:nth-child(3)").text();
//热度值去除中文
original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]","");
if (original_weight.contains(".")){
BigDecimal bigDecimal = new BigDecimal(original_weight);
original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + "";
}
String source_name = "微博";
//String publish_time = DateUtil.getDate();
if(StringUtils.isBlank(topic)) {
continue;
}
JSONObject js = new JSONObject();
js.put("topic", topic);
js.put("source_url", source_url);
js.put("source_name",source_name);
js.put("original_weight", original_weight);
list.add(js);
}
} catch (Exception e) {
}
return list.toJSONString();
}
public static String hotWechat() {
String html = get("https://tophub.today/n/j8Rv21noLw", "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody");
for(int i = 1;i<6;i++) {
Elements select = tobody.select("tr:nth-child("+i+")");
String topic = select.select("td.al > a").text();
//
String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes());
String source_url = "https://weixin.sogou.com/weixin?type=2&ie=utf8&s_from=hotnews&query="+topicUrlString;
Random r = new Random();
String original_weight = r.nextInt(100000)+(11-i)*100000 +"";
//热度值去除中文
String source_name = "微信热词";
//String publish_time = DateUtil.getDate();
if(StringUtils.isBlank(topic)) {
continue;
}
JSONObject js = new JSONObject();
js.put("topic", topic);
js.put("source_url", source_url);
js.put("source_name",source_name);
js.put("original_weight", original_weight);
list.add(js);
}
} catch (Exception e) {
}
return list.toJSONString();
}
/**
* 36氪
* @return
*/
public static String hot36Kr() {
String html = get("https://tophub.today/n/Q1Vd5Ko85R", "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody");
for(int i = 1;i<6;i++) {
Elements select = tobody.select("tr:nth-child("+i+")");
String topic = select.select("td.al > a").text();
//
String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes());
String source_url = "https://www.36kr.com/search/articles/"+topicUrlString;
Random r = new Random();
String original_weight = r.nextInt(100000)+(11-i)*100000 +"";
//热度值去除中文
String source_name = "36氪";
//String publish_time = DateUtil.getDate();
if(StringUtils.isBlank(topic)) {
continue;
}
JSONObject js = new JSONObject();
js.put("topic", topic);
js.put("source_url", source_url);
js.put("source_name",source_name);
js.put("original_weight", original_weight);
list.add(js);
}
} catch (Exception e) {
}
return list.toJSONString();
}
public static String hotDouyin() {
String html = get("https://tophub.today/n/K7GdaMgdQy", "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody");
for(int i = 1;i<6;i++) {
Elements select = tobody.select("tr:nth-child("+i+")");
String topic = select.select("td.al > a").text();
//
String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes());
String source_url = "https://www.douyin.com/search/"+topicUrlString+"";
String original_weight = select.select("td:nth-child(3)").text();
//热度值去除中文
original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]","");
if (original_weight.contains(".")){
BigDecimal bigDecimal = new BigDecimal(original_weight);
original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + "";
}
String source_name = "抖音";
//String publish_time = DateUtil.getDate();
if(StringUtils.isBlank(topic)) {
continue;
}
JSONObject js = new JSONObject();
js.put("topic", topic);
js.put("source_url", source_url);
js.put("source_name",source_name);
js.put("original_weight", original_weight);
list.add(js);
}
} catch (Exception e) {
}
return list.toJSONString();
}
public static String hotBilibili() {
String html = get("https://tophub.today/n/74KvxwokxM", "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody");
for(int i = 1;i<6;i++) {
Elements select = tobody.select("tr:nth-child("+i+")");
String topic = select.select("td.al > a").text();
//
String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes());
String source_url = "https://www.bilibili.com/";
String original_weight = select.select("td:nth-child(3)").text();
//热度值去除中文
original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]","");
if (original_weight.contains(".")){
BigDecimal bigDecimal = new BigDecimal(original_weight);
original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + "";
}
String source_name = "哔哩哔哩";
//String publish_time = DateUtil.getDate();
if(StringUtils.isBlank(topic)) {
continue;
}
JSONObject js = new JSONObject();
js.put("topic", topic);
js.put("source_url", source_url);
js.put("source_name",source_name);
js.put("original_weight", original_weight);
list.add(js);
}
} catch (Exception e) {
}
return list.toJSONString();
}
public static String hotTecent() {
String html = get("https://tophub.today/n/qndg48xeLl", "gb2312");
JSONArray list= new JSONArray();
Document parse = Jsoup.parse(html);
try {
Elements tobody = parse.select("#page > div.c-d.c-d-e > div.Zd-p-Sc > div:nth-child(1) > div.cc-dc-c > div > div.jc-c > table > tbody");
for(int i = 1;i<6;i++) {
Elements select = tobody.select("tr:nth-child("+i+")");
String topic = select.select("td.al > a").text();
//
String topicUrlString = new String(java.net.URLEncoder.encode(topic,"utf-8").getBytes());
String source_url = "https://view.inews.qq.com/";
String original_weight = select.select("td:nth-child(3)").text();
//热度值去除中文
original_weight = original_weight.replaceAll("[\u4e00-\u9fa5]","");
if (original_weight.contains(".")){
BigDecimal bigDecimal = new BigDecimal(original_weight);
original_weight = bigDecimal.multiply(new BigDecimal("10000")).intValue() + "";
}
String source_name = "腾讯新闻";
//String publish_time = DateUtil.getDate();
if(StringUtils.isBlank(topic)) {
continue;
}
JSONObject js = new JSONObject();
js.put("topic", topic);
js.put("source_url", source_url);
js.put("source_name",source_name);
js.put("original_weight", original_weight);
list.add(js);
}
} catch (Exception e) {
}
return list.toJSONString();
}
}

+ 2
- 2
src/main/resources/templates/common/header.html View File

@@ -30,13 +30,13 @@
<i class="mdi mdi-menu font-24"></i>
</a>
</li>
<!-- <li data-type="displayboard" th:classappend="${menu=='displayboard'?'nav-active3':''}"
<li data-type="displayboard" th:classappend="${menu=='displayboard'?'nav-active3':''}"
class="nav-item dropdown hover-none navLi" id="zhkb">
<a class="nav-link dropdown-toggle waves-effect waves-dark">
<span class="d-none d-md-block"> <i class="fa fa-list-alt"></i> 今日热点</span>
<span class="d-block d-md-none">今日热点 </span>
</a>
</li> -->
</li>
<li data-type="analysis" th:classappend="${menu=='analysis'?'nav-active3':''}"
class="nav-item dropdown hover-none navLi">
<a class="nav-link dropdown-toggle waves-effect waves-dark">


Loading…
Cancel
Save