chTypes) {
for (int i = 0; i < chTypes.size(); i++) {
for (int j = 0; j < ch.length; j++) {
if (chTypes.get(i).equals(ch[j])) {
chTypes.set(i, en[j]);
}
}
}
chTypes.add(0, "news_global");
return chTypes;
}
public static String typeTransSingleEnToCh(String enType) {
for (int i = 0; i < en.length; i++) {
if (enType.equals(en[i])) {
return ch[i];
}
}
return null;
}
public static String typeTransSingleChToEn(String chType) {
for (int i = 0; i < ch.length; i++) {
if (chType.equals(ch[i])) {
return en[i];
}
}
return null;
}
}
================================================
FILE: back/src/main/resources/mapper/ArtFeatureCountMap.xml
================================================
update ArtFeatureCount
set ${column} = ${column} + 1, afc_art_time=afc_art_time
where afc_art_id = #{artId}
================================================
FILE: back/src/main/resources/mapper/ArtMap.xml
================================================
insert into Article(art_title, art_cus_id, art_content, art_image_url, art_type, art_legal)
values(#{artTitle}, #{artCusId}, #{artContent}, #{artImageUrl}, #{artType}, #{artLegal})
================================================
FILE: back/src/main/resources/mapper/ArtScoreListMap.xml
================================================
================================================
FILE: back/src/main/resources/mapper/ArtTimeListMap.xml
================================================
================================================
FILE: back/src/main/resources/mapper/ComMap.xml
================================================
insert into Comment(com_content, com_legal, com_cus_id, com_art_id)
values(#{comContent}, #{comLegal}, #{comCusId}, #{comArtId})
================================================
FILE: back/src/main/resources/mapper/CusBehaviorRecordMap.xml
================================================
insert into CusBehaviorRecord(cbr_cus_id_from, cbr_cus_id_to, cbr_behavior)
values (#{param1}, #{param2}, 11)
insert into CusBehaviorRecord(cbr_cus_id_from, cbr_cus_id_to, cbr_behavior, cbr_art_id, cbr_type, cbr_target_id)
values (#{cusIdFrom}, #{cusIdTo}, #{behavior}, #{artId}, #{type}, #{targetId})
delete from CusBehaviorRecord
where cbr_behavior = 11 and cbr_cus_id_from = #{param1} and cbr_cus_id_to = #{param2};
delete from CusBehaviorRecord
where
cbr_cus_id_from = #{cusIdFrom} and cbr_cus_id_to = #{cusIdTo} and cbr_behavior = #{behavior} and
cbr_art_id = #{artId} and cbr_type = #{type} and cbr_target_id = #{targetId}
================================================
FILE: back/src/main/resources/mapper/CusFeatureCountMap.xml
================================================
insert into CusFeatureCount (cfc_cus_id) value(#{cusId})
update CusFeatureCount
set ${column} = ${column} + #{num}
where cfc_cus_id = #{cusId}
================================================
FILE: back/src/main/resources/mapper/CusMap.xml
================================================
insert into Customer(cus_name, cus_pass) values(#{cusName}, #{cusPass});
update Customer
set cus_name = #{cusName}, cus_avatar_url = #{cusAvatarUrl}, cus_style = #{cusStyle}, cus_gender = #{cusGender}
where cus_id = #{cusId}
update Customer
set cus_name = #{cusName}, cus_avatar_url = #{cusAvatarUrl}, cus_style = #{cusStyle}, cus_gender = #{cusGender},
cus_pass = #{cusPass}
where cus_id = #{cusId}
================================================
FILE: back/src/main/resources/mapper/CusRecommendRecordDao.xml
================================================
INSERT INTO CusRecommendRecord(crr_cus_id, crr_art_id)
VALUES
(#{cusId}, #{tar})
================================================
FILE: back/src/main/resources/mapper/RepMap.xml
================================================
insert into Reply(rep_content, rep_type, rep_legal, rep_cus_id, rep_art_id, rep_com_id, rep_rep_id)
values (#{repContent}, #{repType}, #{repLegal}, #{repCusId}, #{repArtId}, #{repComId}, #{repRepId})
================================================
FILE: back/src/main/resources/templates/application.properties.template
================================================
spring.datasource.driverClassName = com.mysql.jdbc.Driver
spring.datasource.url = jdbc:mysql://
spring.datasource.username =
spring.datasource.password =
mybatis.configuration.map-underscore-to-camel-case=true
mybatis.mapper-locations=classpath:mapper/*.xml
================================================
FILE: back/src/test/java/com/smacul/demo/DemoApplicationTests.java
================================================
package com.smacul.demo;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
@SpringBootTest
class DemoApplicationTests {
@Test
void contextLoads() {
}
}
================================================
FILE: front/.gitignore
================================================
.DS_Store
node_modules
/dist
# local env files
.env.local
.env.*.local
# Log files
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Editor directories and files
.idea
.vscode
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?
================================================
FILE: front/README.md
================================================
# front
## Project setup
```
yarn install
```
### Compiles and hot-reloads for development
```
yarn serve
```
### Compiles and minifies for production
```
yarn build
```
### Lints and fixes files
```
yarn lint
```
### Customize configuration
See [Configuration Reference](https://cli.vuejs.org/config/).
================================================
FILE: front/babel.config.js
================================================
module.exports = {
presets: [
'@vue/cli-plugin-babel/preset'
]
}
================================================
FILE: front/package.json
================================================
{
"name": "front",
"version": "0.1.0",
"private": true,
"scripts": {
"serve": "vue-cli-service serve",
"build": "vue-cli-service build",
"lint": "vue-cli-service lint"
},
"dependencies": {
"core-js": "^3.4.4",
"element-ui": "^2.3.6",
"vue": "^2.6.10",
"vue-blu": "^0.1.9",
"vue-router": "^3.1.3",
"wangeditor": "^3.1.1"
},
"devDependencies": {
"@vue/cli-plugin-babel": "^4.1.0",
"@vue/cli-plugin-eslint": "^4.1.0",
"@vue/cli-plugin-router": "^4.1.2",
"@vue/cli-service": "^4.1.0",
"axios": "^0.18.0",
"babel-eslint": "^10.0.3",
"eslint": "^5.16.0",
"eslint-plugin-vue": "^5.0.0",
"node-sass": "^4.13.1",
"sass-loader": "^8.0.2",
"vue-cli-plugin-axios": "^0.0.4",
"vue-cli-plugin-element-ui": "^1.1.4",
"vue-template-compiler": "^2.6.10"
},
"eslintConfig": {
"root": true,
"env": {
"node": true
},
"extends": [
"plugin:vue/essential",
"eslint:recommended"
],
"rules": {},
"parserOptions": {
"parser": "babel-eslint"
}
},
"browserslist": [
"> 1%",
"last 2 versions"
]
}
================================================
FILE: front/public/index.html
================================================
front
================================================
FILE: front/src/App.vue
================================================
================================================
FILE: front/src/assets/css/Narrow.css
================================================
body {
margin: 0 auto;
}
header {
height: 60px;
margin-bottom: 10px;
box-shadow: 0 1px 4px 0 rgba(0,0,0,.12);
width: 1000000px;
overflow: hidden;
position: fixed;
background-color: #fff;
z-index: 10;
top: 0;
}
main {
width: 1030px;
margin: 70px auto 10px;
position: relative;
z-index: 1;
height: 1000px;
}
/*nav {*/
/* width: 130px;*/
/* position: fixed;*/
/*}*/
article {
margin-left:0;
width: 710px;
}
aside {
width: 280px;
position: fixed;
top: 70px;
margin-left: 750px;
}
.top-bar {
width: 1030px;
height: 40px;
padding: 10px 0px 10px 0px;
position: fixed;
left: -515px;
margin-left: 50%;
}
================================================
FILE: front/src/assets/css/Normal.css
================================================
body {
margin: 0 auto;
}
header {
height: 60px;
margin-bottom: 10px;
box-shadow: 0 1px 4px 0 rgba(0,0,0,.12);
width: 1000000px;
overflow: hidden;
position: fixed;
background-color: #fff;
z-index: 10;
top: 0;
}
main {
width: 1180px;
margin: 70px auto 10px;
position: relative;
z-index: 1;
height: 1000px;
}
nav {
width: 130px;
position: fixed;
z-index: 2;
}
article {
margin-left:160px;
width: 710px;
z-index: 1;
}
aside {
width: 280px;
position: fixed;
top: 70px;
margin-left: 900px;
}
.top-bar {
width: 1180px;
height: 40px;
padding: 10px 0px 10px 0px;
position: fixed;
left: -590px;
margin-left: 50%;
}
================================================
FILE: front/src/components/HelloWorld.vue
================================================
{{ msg }}
For a guide and recipes on how to configure / customize this project,
check out the
vue-cli documentation.
Installed CLI Plugins
Essential Links
Ecosystem
================================================
FILE: front/src/components/app/HelloWorld.vue
================================================
{{ msg }}
For a guide and recipes on how to configure / customize this project,
check out the
vue-cli documentation.
Installed CLI Plugins
Essential Links
Ecosystem
================================================
FILE: front/src/components/article/ArticleCenter.vue
================================================
{{articleMain.artTitle}}
{{ articleMain.artType }}
{{ articleMain.cusName }}
{{ dataTransfer }}
================================================
FILE: front/src/components/article/CommentReplyCenter.vue
================================================
================================================
FILE: front/src/components/article/RightMenu.vue
================================================
================================================
FILE: front/src/components/article/TopBar.vue
================================================
退出登录
================================================
FILE: front/src/components/article/comment-reply-main/CommentReplyInput.vue
================================================
================================================
FILE: front/src/components/article/comment-reply-main/ReplyMain.vue
================================================
================================================
FILE: front/src/components/common/DarkCard.vue
================================================
================================================
FILE: front/src/components/common/EditEntrance.vue
================================================
点击
开始书写自己的新闻
================================================
FILE: front/src/components/common/EditorBrief.vue
================================================
{{articleAuthor.cusName}}
+关注
取消关注
================================================
FILE: front/src/components/common/FloatCard.vue
================================================
================================================
FILE: front/src/components/common/HotArticle.vue
================================================
{{ hotArticle.artTitle }}
================================================
FILE: front/src/components/common/SearchPanel.vue
================================================
{{tip}}
================================================
FILE: front/src/components/common/TinyArticle.vue
================================================
{{ tinyArticle.artTitle }}
{{ tinyArticle.artType}}
{{ tinyArticle.customer.cusName }}
{{ date(tinyArticle.artTime) }}
================================================
FILE: front/src/components/edit/TopBar.vue
================================================
退出登录
================================================
FILE: front/src/components/index/LeftMenu.vue
================================================
================================================
FILE: front/src/components/index/TopBar.vue
================================================
退出登录
================================================
FILE: front/src/components/port/LoginPart.vue
================================================
登 录
登录
注册
================================================
FILE: front/src/components/port/RegisterPart.vue
================================================
注 册
注册
有账号了, 去登录
================================================
FILE: front/src/components/search/RightMenu.vue
================================================
================================================
FILE: front/src/components/search/TinyCenter.vue
================================================
我也是有底线哒 ~
================================================
FILE: front/src/components/search/TopBar.vue
================================================
退出登录
================================================
FILE: front/src/components/self/EditorMain.vue
================================================
{{customer.cusName}}
{{ customer.cusStyle}}
+关注
取消关注
================================================
FILE: front/src/components/self/RightMenu.vue
================================================
{{info.followNum}}
关注了
{{info.fanNum}}
粉丝
个人成就
阅读了
{{info.readNum}}
篇文章
发表了
{{info.artEditNum}}
篇文章
编辑了
{{info.comEditNum + info.repEditNum}}
次评论
================================================
FILE: front/src/components/self/TinyCenter.vue
================================================
{{customerDynamic.cusFrom.cusName}}
{{transBehaviorCodeToWord(customerDynamic.cbrBehavior)}}
{{ customerDynamic.article.artTitle }}
{{ customerDynamic.article.artType}}
{{ customerDynamic.cusTo.cusName }}
{{ date(customerDynamic.article.artTime) }}
{{ customerDynamic.article.artTitle }}
{{ customerDynamic.article.artType}}
{{ customerDynamic.cusTo.cusName }}
{{ date(customerDynamic.reply.repTime) }}
{{ customerDynamic.cusTo.cusName }}
{{ date(customerDynamic.cbrTime) }}
>
我也是有底线哒 ~
================================================
FILE: front/src/components/self/TopBar.vue
================================================
退出登录
================================================
FILE: front/src/control/Discuss.js
================================================
import axios from 'axios'
let base = '/api/discuss/';
export function getComList(artId) {
let config = {
params: {
artId: artId,
}
};
return axios.get(base + 'page', config);
}
export function addNewCom(comment) {
return axios.post(base + 'com', comment)
}
export function cusAddReply(reply) {
return axios.post(base + 'rep', reply)
}
// export function cusComPreference(comId, preference) {
// return axios.get('/api/discuss/precom?comId=' + comId + '&preference=' + preference)
// }
//
// export function cusRepPreference(repId, preference) {
// return axios.get('/api/discuss/prerep?repId=' + repId + '&preference=' + preference)
// }
================================================
FILE: front/src/control/Edit.js
================================================
import axios from 'axios'
let base = '/api/edit/';
export function addNewArt(article) {
return axios.post(base + 'add', article);
}
================================================
FILE: front/src/control/Load.js
================================================
import axios from 'axios'
let base = '/api/load/';
export function getArtTypes() {
return axios.get(base + 'type');
}
export function getTinyArtOnePageByType(artType, page, pageSize) {
let config = {
params: {
artType: artType,
page: page,
pageSize: pageSize
}
};
return axios.get(base + 'tiny', config);
}
export function getHotArtOnePage(page, pageSize) {
let config = {
params: {
page: page,
pageSize: pageSize
}
};
return axios.get(base + 'hot', config);
}
export function getFullArt(artId) {
let config = {
params: {
artId: artId
}
};
return axios.get(base + 'main', config);
}
export function setArtPreference(artId, type) {
let config = {
params: {
artId: artId,
type: type
}
};
return axios.get(base + 'prefer', config);
}
================================================
FILE: front/src/control/Search.js
================================================
import axios from 'axios'
let base = '/api/search/';
export function searchContentSimple(key, page, pageSize) {
let config = {
params: {
key: key,
page: page,
pageSize: pageSize
}
};
return axios.get(base + 'simple', config);
}
// export function searchContentByKeyAndTagTypePage(key, tag, type, page, pageSize) {
// return axios.get('/api/search/key?key=' + key + '&tag=' + tag + '&type=' + type + '&page=' + page + '&pageSize=' + pageSize)
// }
================================================
FILE: front/src/control/Self.js
================================================
import axios from 'axios'
let base = '/api/self/';
export function cusLogin(cusName, cusPass) {
let data = new FormData();
data.append('cusName', cusName);
data.append('cusPass', cusPass);
return axios.post(base + 'login', data);
}
export function quitLogin() {
return axios.get(base + 'quit');
}
export function cusRegister(cusName, cusPass) {
let data = new FormData();
data.append('cusName', cusName);
data.append('cusPass', cusPass);
return axios.post(base + 'register', data)
}
export function getCusBasicInfo(cusId) {
let config = {
params: {
cusId: cusId
}
};
return axios.get(base + 'basic', config)
}
export function setCusBasicInfo(customer) {
let config = {
params: {
customer: customer
}
};
return axios.get(base + 'modify', config);
}
export function setCusFollow(cusId) {
let config = {
params: {
cusId: cusId
}
};
return axios.get(base + 'follow', config);
}
export function getCusFeatureInfo(cusId) {
let config = {
params: {
cusId: cusId
}
};
return axios.get(base + 'feature', config);
}
export function getCusSelfDynamic(cusId, page, pageSize) {
let config = {
params: {
cusId: cusId,
page: page,
pageSize: pageSize
}
};
return axios.get(base + 'dynamic', config);
}
export function checkCusFollow(cusId) {
let config = {
params: {
cusId: cusId
}
};
return axios.get(base + 'chefollow', config)
}
================================================
FILE: front/src/main.js
================================================
import Vue from 'vue'
import './plugins/axios'
import App from './App.vue'
import ElementUI from 'element-ui'
// import VueBlu from 'vue-blu'
import locale from 'element-ui/lib/locale/lang/zh-CN'
import './styles.scss'
// import 'vue-blu/dist/css/vue-blu.min.css'
import router from './router'
Vue.use(ElementUI, { locale });
// Vue.use(VueBlu);
Vue.config.productionTip = false;
new Vue({
router,
render: h => h(App)
}).$mount('#app');
================================================
FILE: front/src/plugins/axios.js
================================================
"use strict";
import Vue from 'vue';
import axios from "axios";
// Full config: https://github.com/axios/axios#request-config
// axios.defaults.baseURL = process.env.baseURL || process.env.apiUrl || '';
// axios.defaults.headers.common['Authorization'] = AUTH_TOKEN;
// axios.defaults.headers.post['Content-Type'] = 'application/x-www-form-urlencoded';
let config = {
// baseURL: process.env.baseURL || process.env.apiUrl || ""
// timeout: 60 * 1000, // Timeout
// withCredentials: true, // Check cross-site Access-Control
};
const _axios = axios.create(config);
_axios.interceptors.request.use(
function(config) {
// Do something before request is sent
return config;
},
function(error) {
// Do something with request error
return Promise.reject(error);
}
);
// Add a response interceptor
_axios.interceptors.response.use(
function(response) {
// Do something with response data
return response;
},
function(error) {
// Do something with response error
return Promise.reject(error);
}
);
Plugin.install = function(Vue) {
Vue.axios = _axios;
window.axios = _axios;
Object.defineProperties(Vue.prototype, {
axios: {
get() {
return _axios;
}
},
$axios: {
get() {
return _axios;
}
},
});
};
Vue.use(Plugin)
export default Plugin;
================================================
FILE: front/src/router/index.js
================================================
import Vue from 'vue'
import VueRouter from 'vue-router'
// import Home from '../views/Home.vue'
import IndexView from '../views/IndexView'
import SearchView from '../views/SearchView'
import ArticleView from "../views/ArticleView"
import SelfView from '../views/SelfView'
import PortView from "../views/PortView";
import EditView from "../views/EditView";
Vue.use(VueRouter);
const routes = [
{
path: '/',
name: 'PortView',
component: PortView
},
{
path: '/index',
name: 'IndexView',
component: IndexView
},
{
path: '/search/:key',
name: 'SearchView',
component: SearchView
},
{
path: '/article/:artId',
name: 'ArticleView',
component: ArticleView
},
{
path: '/self/:cusId',
name: 'SelfView',
component: SelfView
},
{
path: '/port',
name: 'PortView',
component: PortView
},
{
path: '/edit',
name: 'EditView',
component: EditView
}
// {
// path: '/about',
// name: 'about',
// // route level code-splitting
// // this generates a separate chunk (about.[hash].js) for this route
// // which is lazy-loaded when the route is visited.
// component: () => import(/* webpackChunkName: "about" */ '../views/About.vue')
// }
];
const router = new VueRouter({
mode: 'history',
base: process.env.BASE_URL,
routes
});
export default router
================================================
FILE: front/src/styles.scss
================================================
/* theme color */
$--color-primary: teal;
/* icon font path, required */
$--font-path: '~element-ui/lib/theme-chalk/fonts';
@import "~element-ui/packages/theme-chalk/src/index";
================================================
FILE: front/src/util/PageJump.js
================================================
import router from '@/router/index.js'
export function jumpInCurPage(road) {
let route = router.resolve(road);
window.open(route.href, '_self');
}
export function jumpInNewPage(road) {
let route = router.resolve(road);
window.open(route.href, '_blank');
}
================================================
FILE: front/src/util/TimeHandler.js
================================================
export function transUTCtoLocal(time) {
return new Date(Date.parse(time)).toLocaleString();
}
================================================
FILE: front/src/views/About.vue
================================================
This is an about page
================================================
FILE: front/src/views/ArticleView.vue
================================================
================================================
FILE: front/src/views/EditView.vue
================================================
================================================
FILE: front/src/views/Home.vue
================================================
================================================
FILE: front/src/views/IndexView.vue
================================================
================================================
FILE: front/src/views/PortView.vue
================================================
================================================
FILE: front/src/views/SearchView.vue
================================================
================================================
FILE: front/src/views/SelfView.vue
================================================
================================================
FILE: front/vue.config.js
================================================
module.exports = {
devServer: {
// Paths
// assetsSubDirectory: 'static',
// assetsPublicPath: '/',
proxy: {
'/api': {
target: 'http://0.0.0.0:8080',
changeOrigin: true,
pathRewrite: {
'^/api': '' //重写接口
}
}
},
// // Various Dev Server settings
host: '0.0.0.0', // can be overwritten by process.env.HOST
port: 8071, // can be overwritten by process.env.PORT, if port is in use, a free one will be determined
// autoOpenBrowser: false,
// errorOverlay: true,
// notifyOnErrors: true,
// poll: false, // https://webpack.js.org/configuration/dev-server/#devserver-watchoptions-
//
//
// /**
// * Source Maps
// */
//
// // https://webpack.js.org/configuration/devtool/#development
// devtool: 'cheap-module-eval-source-map',
//
// // If you have problems debugging vue-files in devtools,
// // set this to false - it *may* help
// // https://vue-loader.vuejs.org/en/options.html#cachebusting
// cacheBusting: true,
//
// cssSourceMap: true
},
}
================================================
FILE: spider/Main.py
================================================
import model.ArticleModel as ArtMod
import model.ReplyModel as RepMod
import model.CommentModel as ComMod
import model.CustomerModel as CusMod
import dao.ArticleDao as ArtDao
import dao.ReplyDao as RepDao
import dao.CommentDao as ComDao
import dao.CustomerDao as CusDao
import process.ArticleProcess as ArtPro
import process.ReplyProcess as RepPro
import process.CommentProcess as ComPro
import process.CustomerProcess as CusPro
import util.MySql as MySql
import util.Json as Json
import util.Time as Time
import os.path
import logging
import random
log_file_name = os.path.join('log', '%s.txt' % Time.Time.get_local_time())
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(levelname)s - %(module)s - %(funcName)s : \t%(message)s')
handler = logging.FileHandler(filename=log_file_name, mode='a', encoding='utf-8')
handler.setFormatter(formatter)
logger.addHandler(handler)
class Major:
def __init__(self, path):
db = Json.Json.read_json_file(path)
self.__base = MySql.MySql(db_name=db['name'], user=db['user'], password=db['pass'],
host=db['host'], charset=db['charset'])
self.__art_pro = ArtPro.ArticleProcess()
self.__rep_pro = RepPro.ReplyProcess()
self.__com_pro = ComPro.CommentProcess()
self.__cus_pro = CusPro.CustomerProcess()
self.__cus_dao = CusDao.CustomerDao(self.__base)
self.__art_dao = ArtDao.ArticleDao(self.__base)
self.__com_dao = ComDao.CommentDao(self.__base)
self.__rep_dao = RepDao.ReplyDao(self.__base)
def major(self):
categories = ['news_society', 'news_entertainment', 'news_tech', 'news_military', 'news_sports', 'news_car',
'news_finance', 'news_world', 'news_fashion', 'news_travel', 'news_discovery', 'news_baby',
'news_regimen', 'news_story', 'news_essay', 'news_game', 'news_history', 'news_food']
# categories = ['news_society']
for category in categories:
print("当前类别: %s" % category)
logging.info("当前类别: %s" % category)
""" 处理 art
"""
try:
arts_brief_json = self.__art_pro.get_arts_brief_json_by_category(category)
logging.info('%s arts_brief_json 获取 成功' % category)
except:
print('%s arts_brief_json 获取 失败' % category)
logging.exception('%s arts_brief_json 获取 失败' % category)
continue
for art_i, art_brief_json in enumerate(arts_brief_json):
print("当前新闻: %d/%d %s" % (art_i, len(arts_brief_json), category))
logging.info("当前新闻: %d/%d %s" % (art_i, len(arts_brief_json), category))
""" 新闻作者
"""
art_cus_mod = CusMod.CustomerModel()
try:
self.__cus_pro.set_art_cus(art_brief_json, art_cus_mod)
self.__cus_dao.insert_then_get_cus(art_cus_mod)
self.__cus_dao.update_cus_feature(category, art_cus_mod.cus_id, flag=True)
logging.info("%s-%d art_cus 处理 成功" % (category, art_i))
except:
print("%s-%d art_cus 处理 失败" % (category, art_i))
logging.exception("%s-%d art_cus 处理 失败" % (category, art_i))
continue
""" 新闻
"""
art_mod = ArtMod.ArticleModel()
try:
self.__art_pro.set_art(art_brief_json, category, art_cus_mod.cus_id, art_mod)
if not self.__art_dao.is_art_exist(art_mod.art_spider):
# 新闻不存在的情况
self.__art_dao.insert_art(art_mod)
else:
print("art 已存在")
continue
art_mod.art_id = self.__art_dao.search_art_id_by_spider(art_mod.art_spider)
# art_mod.art_time = self.__art_dao.search_art_time_by_spider(art_mod.art_spider)
logging.info("%s-%d art 操作 成功" % (category, art_i))
except:
print("%s-%d art 操作 失败" % (category, art_i))
logging.exception("%s-%d art 操作 失败" % (category, art_i))
continue
""" 新闻 用户 行为
"""
try:
if self.__art_dao.check_art_cus_relationship(art_mod.art_id, art_cus_mod.cus_id):
self.__cus_dao.insert_cus_behavior(
art_cus_mod.cus_id, art_cus_mod.cus_id, 1, art_mod.art_id, 1,
art_mod.art_id, cbr_time=art_mod.art_time
)
self.__cus_dao.insert_cus_behavior(
art_cus_mod.cus_id, art_cus_mod.cus_id, 2, art_mod.art_id, 1,
art_mod.art_id
)
self.__cus_dao.update_cus_feature(category, art_cus_mod.cus_id)
self.__art_dao.update_art_feature(1, art_mod.art_id, art_mod.art_time)
else:
pass
logging.info("%s-%d rt-cus 行为 1 数据库操作 成功" % (category, art_i))
except:
print("%s-%d rt-cus 行为 1 数据库操作 失败" % (category, art_i))
logging.exception("%s-%d rt-cus 行为 1 数据库操作 失败" % (category, art_i))
continue
""" 评论与回复处理
"""
try:
coms_json = self.__com_pro.get_coms_json(art_brief_json)
if coms_json is None:
continue
logging.info("%s-%d coms_json 获取 成功" % (category, art_i))
except:
print("\t%s-%d coms_json 获取 失败" % (category, art_i))
logging.exception("%s-%d coms_json 获取 失败" % (category, art_i))
continue
for com_i, com_json in enumerate(coms_json):
print("\t当前评论: %d/%d" % (com_i, len(coms_json)))
logging.info("当前评论: %d/%d" % (com_i, len(coms_json)))
""" 评论用户
"""
com_cus_mod = CusMod.CustomerModel()
try:
self.__cus_pro.set_com_cus(com_json, com_cus_mod)
self.__cus_dao.insert_then_get_cus(com_cus_mod)
self.__cus_dao.update_cus_feature(category, com_cus_mod.cus_id, flag=True)
# self.__cus_dao.cus_watch_other_same_category_art(com_cus_mod.cus_id, art_mod.art_id, category)
logging.info("%s-%d-%d com_cus 处理 错误" % (category, art_i, com_i))
except:
print("\t%s-%d-%d com_cus 处理 错误" % (category, art_i, com_i))
logging.exception("%s-%d-%d com_cus 处理 错误" % (category, art_i, com_i))
continue
""" 评论
"""
com_mod = ComMod.CommentModel()
try:
self.__com_pro.set_com(com_json, art_mod.art_id, com_cus_mod.cus_id, com_mod)
if not self.__com_dao.is_com_exist(com_mod.com_spider):
# 如果评论不存在
self.__com_dao.insert_com(com_mod)
else:
print("com 已存在")
continue
com_mod.com_id = self.__com_dao.search_com_id_by_spider(com_mod.com_spider)
logging.info("%s-%d-%d com 处理 失败" % (category, art_i, com_i))
except:
print("\t%s-%d-%d com 处理 失败" % (category, art_i, com_i))
logging.exception("%s-%d-%d com 处理 失败" % (category, art_i, com_i))
continue
""" 评论 用户 行为
"""
try:
if self.__com_dao.check_com_cus_relationship(art_mod.art_id, com_mod.com_id, com_cus_mod.cus_id):
self.__cus_dao.insert_cus_behavior(
com_cus_mod.cus_id, art_cus_mod.cus_id, 5, art_mod.art_id, 2,
com_mod.com_id, cbr_time=com_mod.com_time
)
self.__cus_dao.insert_cus_behavior(
com_cus_mod.cus_id, art_cus_mod.cus_id, 2, art_mod.art_id, 1,
art_mod.art_id
)
self.__cus_dao.update_cus_feature(category, com_cus_mod.cus_id)
self.__art_dao.update_art_feature(4, art_mod.art_id, art_mod.art_time)
else:
pass
logging.info("%s-%d-%d art-cus 行为 4 数据库操作 成功" % (category, art_i, com_i))
except:
print("\t%s-%d-%d art-cus 行为 4 数据库操作 失败" % (category, art_i, com_i))
logging.exception("%s-%d-%d art-cus 行为 4 数据库操作 失败" % (category, art_i, com_i))
continue
""" 评论用户 模拟浏览
"""
try:
result_list = None
rand_category_num = random.randint(1, 2)
rand_cates = random.sample(categories, rand_category_num)
for rand_cate in rand_cates:
result_list = self.__art_dao.get_same_category_art(art_mod.art_id, rand_cate)
if result_list is not None:
for back_art in result_list:
try:
self.__cus_dao.insert_cus_behavior(
com_cus_mod.cus_id, back_art[1], 2, back_art[0], 1, back_art[0]
)
self.__cus_dao.update_cus_feature(rand_cate, com_cus_mod.cus_id, update_num=1)
self.__art_dao.update_art_feature(6, back_art[0], art_mod.art_time)
except:
continue
print("\t%d 用户模拟浏览操作 数量 %d 完成" % (com_cus_mod.cus_id, len(result_list)))
logging.info("%d 模拟浏览操作 数量 %d 完成" % (com_cus_mod.cus_id, len(result_list)))
except:
print("\t%d 用户模拟浏览操作 失败" % com_cus_mod.cus_id)
logging.exception("%d 用户模拟浏览操作 失败" % com_cus_mod.cus_id)
""" 回复处理
"""
try:
reps_json = self.__rep_pro.get_reps_json(com_json)
if reps_json is None:
continue
logging.info("%s-%d-%d reps_json 获取 成功" % (category, art_i, com_i))
except:
print("\t\t%s-%d-%d reps_json 获取 失败" % (category, art_i, com_i))
logging.exception("%s-%d-%d reps_json 获取 失败" % (category, art_i, com_i))
continue
for rep_i, rep_json in enumerate(reps_json):
""" 回复用户
"""
rep_cus_mod = CusMod.CustomerModel()
try:
self.__cus_pro.set_rep_cus(rep_json, rep_cus_mod)
self.__cus_dao.insert_then_get_cus(rep_cus_mod)
self.__cus_dao.update_cus_feature(category, rep_cus_mod.cus_id, flag=True)
logging.info("%s-%d-%d-%d rep_cus 处理 成功" % (category, art_i, com_i, rep_i))
except:
print("\t\t%s-%d-%d-%d rep_cus 处理 失败" % (category, art_i, com_i, rep_i))
logging.exception("%s-%d-%d-%d rep_cus 处理 失败" % (category, art_i, com_i, rep_i))
continue
""" 回复
"""
rep_mod = RepMod.ReplyModel()
try:
self.__rep_pro.set_rep(rep_json, art_mod.art_id,
com_mod.com_id, rep_cus_mod.cus_id, rep_mod)
if not self.__rep_dao.is_rep_exist(rep_mod.rep_spider):
self.__rep_dao.search_rep_rep_by_spyder(rep_json, rep_mod)
self.__rep_dao.insert_rep(rep_mod)
else:
print("rep 已存在")
continue
rep_mod.rep_id = self.__rep_dao.search_rep_id_by_spider(rep_mod.rep_spider)
logging.info("%s-%d-%d-%d rep 处理 成功" % (category, art_i, com_i, rep_i))
except:
print("\t\t%s-%d-%d-%d rep 处理 失败" % (category, art_i, com_i, rep_i))
logging.exception("%s-%d-%d-%d rep 处理 失败" % (category, art_i, com_i, rep_i))
continue
""" 回复 用户 行为
"""
try:
if self.__rep_dao.check_rep_cus_relationship(art_mod.art_id, rep_mod.rep_id,
rep_cus_mod.cus_id):
self.__cus_dao.insert_cus_behavior(
rep_cus_mod.cus_id, art_cus_mod.cus_id, 8, art_mod.art_id, 3,
rep_mod.rep_id, cbr_time=rep_mod.rep_time
)
self.__cus_dao.insert_cus_behavior(
rep_cus_mod.cus_id, art_cus_mod.cus_id, 2, art_mod.art_id, 1,
art_mod.art_id
)
self.__cus_dao.update_cus_feature(category, rep_cus_mod.cus_id)
self.__art_dao.update_art_feature(5, art_mod.art_id, art_mod.art_time)
else:
pass
logging.info("%s-%d-%d-%d art-cus 行为 5 数据库操作 成功" % (category, art_i, com_i, rep_i))
except:
print("\t\t%s-%d-%d-%d art-cus 行为 5 数据库操作 失败" % (category, art_i, com_i, rep_i))
logging.exception("%s-%d-%d-%d art-cus 行为 5 数据库操作 失败" % (category, art_i, com_i, rep_i))
continue
""" 回复用户 模拟浏览
"""
try:
result_list = None
rand_category_num = random.randint(1, 2)
rand_cates = random.sample(categories, rand_category_num)
for rand_cate in rand_cates:
result_list = self.__art_dao.get_same_category_art(art_mod.art_id, rand_cate)
if result_list is not None:
for back_art in result_list:
try:
self.__cus_dao.insert_cus_behavior(
rep_cus_mod.cus_id, back_art[1], 2, back_art[0], 1, back_art[0]
)
self.__cus_dao.update_cus_feature(rand_cate, rep_cus_mod.cus_id, update_num=1)
self.__art_dao.update_art_feature(6, back_art[0], art_mod.art_time)
except:
continue
print("\t\t%d 用户模拟浏览操作 数量 %d 完成" % (rep_cus_mod.cus_id, len(result_list)))
logging.info("%d 用户模拟浏览操作 数量 %d 完成" % (rep_cus_mod.cus_id, len(result_list)))
except:
print("\t\t%d 用户模拟浏览操作 失败" % rep_cus_mod.cus_id)
logging.exception("%d 用户模拟浏览操作 失败" % rep_cus_mod.cus_id)
if __name__ == '__main__':
Major(os.path.join('properties', 'database.json')).major()
================================================
FILE: spider/dao/ArticleDao.py
================================================
import util.MySql as MySql
import model.ArticleModel as ArtMod
import random
import logging
class ArticleDao:
""" 负责文章的数据库操作
# 20-04-17 针对新的 SQL 完成修改.
"""
def __init__(self, base: MySql.MySql):
self.__base = base
def is_art_exist(self, art_spider):
""" 检查新闻是否存在
# 20-04-17 修改完成
:param art_spider:
:return:
"""
try:
search_sql = "select count(*) from Article where art_spider = '%s'" % art_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
logging.info("新闻 art_spider=%s 数据库查询 不存在" % art_spider)
return False
else:
logging.info("新闻 art_spider=%s 数据库查询 已存在" % art_spider)
return True
except:
logging.exception("新闻 art_spider=%s 数据库查询 失败" % art_spider)
raise
def insert_art(self, art_mod: ArtMod.ArticleModel):
""" 插入新闻数据
# 20-04-17 修改完成
# 20-04-23 Rollback BUG Fix
:param art_mod:
:return:
"""
try:
insert_sql = "insert into Article(art_title, art_spider, art_type, art_image_url, " \
"art_content, art_tags, " \
"art_cus_id, art_time, art_legal)" \
" values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', %d)" \
% (art_mod.art_title, art_mod.art_spider, art_mod.art_type, art_mod.art_image_url,
art_mod.art_content, art_mod.art_tags,
art_mod.art_cus_id, art_mod.art_time, art_mod.art_legal)
self.__base.execute_sql(insert_sql)
self.__base.commit_transactions()
logging.info("新闻 art_spider=%s 数据库插入 成功" % art_mod.art_spider)
except:
# self.__base.commit_rollback()
logging.exception("新闻 art_spider=%s 数据库插入 失败" % art_mod.art_spider)
raise
def search_art_id_by_spider(self, art_spider):
""" 通过 spider 查询文章 id
# 20-04-17 修改完成
:param art_spider:
:return:
"""
try:
search_sql = "select art_id from Article where art_spider = '%s'" % art_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
logging.info("新闻 art_spider=%s 数据库查询: art_id 值: %s" % (art_spider, result[0]))
return result[0]
except:
logging.info("新闻 art_spider=%s 数据库查询 art_id 失败" % art_spider)
raise
def search_art_time_by_spider(self, art_spider):
""" 通过 spider 搜索文章时间
# 20-04-17 修改完毕
:param art_spider:
:return:
"""
try:
search_sql = "select art_time from Article where art_spider = '%s'" % art_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
logging.info("新闻 art_spider=%s art_time 数据库查询: art_id 值: %s" % (art_spider, result[0]))
return result[0]
except:
logging.info("新闻 art_spider=%s art_time 数据库查询 art_id 失败" % art_spider)
raise
def check_art_cus_relationship(self, art_id, cus_id):
""" 检查 art 与 cus 是否存在.
# 20-04-17 修改完成
:param art_id:
:param cus_id:
:return:
"""
try:
search_sql = "select count(*) from Article where art_id=%d and art_cus_id=%d" % (art_id, cus_id)
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
logging.info("关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 不存在" % (art_id, cus_id))
return False
else:
logging.info("关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 存在" % (art_id, cus_id))
return True
except:
logging.exception("关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 错误" % (art_id, cus_id))
raise
@DeprecationWarning
def update_art_com_number(self, art_id):
try:
update_sql = "update Articles set art_comment_num = art_comment_num + 1 where art_id = %d" % art_id
self.__base.execute_sql(update_sql)
self.__base.commit_transactions()
logging.info("update_art_com_number art=%s 评论数 数据库更新 成功" % art_id)
except:
# self.__base.commit_rollback()
logging.info("update_art_com_number art=%s 评论数 数据库更新 失败" % art_id)
raise
def update_art_feature(self, behavior, art_id, art_time):
""" 更新新闻的统计信息
behavior 为 1 是一个比较特殊的情况, 它将设置 update_art_feature 表中的时间.
# 20-04-17 修改完成
# 20-04-23 Rollback BUG Fix
# 20-05-15 修改方法逻辑, 当行为为 6 时, 只将新闻阅读数量加 1
:param behavior: 这个行为和那个用户行为是两个东西.
:param art_id:
:param art_time:
:return:
"""
try:
behavior_dict = {
1: 'afc_art_time',
2: 'afc_like_num',
3: 'afc_dislike_num',
4: 'afc_com_num',
5: 'afc_rep_num',
6: 'afc_read_num'
}
if behavior == 1:
update_sql = "insert into ArtFeatureCount(afc_art_id, afc_art_time) values(%d, '%s')" % (art_id, art_time)
elif behavior != 1 or behavior != 6:
update_sql = "update ArtFeatureCount set {0}={1}+1, afc_read_num=afc_read_num+1, afc_art_time=afc_art_time" \
" where afc_art_id=%d"\
.format(behavior_dict[behavior], behavior_dict[behavior]) % (art_id)
else:
update_sql = "update ArtFeatureCount set afc_read_num=afc_read_num+1, afc_art_time=afc_art_time" \
" where afc_art_id=%d"\
% (art_id)
self.__base.execute_sql(update_sql)
# logging.info("新闻 art_id=%s 特征 %s 数据库插入 成功" % (art_id, behavior))
except:
# self.__base.commit_rollback()
# logging.exception("新闻 art_id=%s 特征 %s 数据库插入 失败" % (art_id, behavior))
raise
def get_same_category_art(self, cur_art_id, category):
""" 随机选择一定数量的同类文章
此方法的作用主要是为了帮组增加用户的行为数据.
用户随机浏览发生在两个方面: 一是浏览数量的随机 [1, 40], 二是同类别下浏览文章的随机.
20-05-15 创建方法
20-05-19 Bug 修改, 随机逻辑添加
:param cur_art_id: 当前文章 id
:param category: 新闻类别
:return:
"""
try:
rand_num = random.randint(1, 40)
select_sql = "select art_id, art_cus_id from Article " \
"where art_type = '%s' and art_id != %d and timestampdiff(HOUR, art_time, now()) < 240 " \
"order by rand() limit %d" % \
(category, cur_art_id, rand_num)
self.__base.execute_sql(select_sql)
return self.__base.get_result_all()
except:
raise
================================================
FILE: spider/dao/CommentDao.py
================================================
import util.MySql as MySql
import model.CommentModel as ComMod
import logging
class CommentDao:
""" 负责评论的数据库操作
# 20-04-17 针对新的 SQL 完成修改.
"""
def __init__(self, base: MySql.MySql):
self.__base = base
def is_com_exist(self, com_spider):
""" 检查评论是否存在于数据库
# 20-04-17 修改完成
:param com_spider:
:return:
"""
try:
search_sql = "select count(*) from Comment where com_spider = '%s'" % com_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
# logging.info("is_com_exist 评论 com_spider=%s 数据库查询 不存在" % com_spider)
return False
else:
# logging.info("is_com_exist 评论 com_spider=%s 数据库查询 已存在" % com_spider)
return True
except:
# logging.exception("is_com_exist 评论 com_spider=%s 数据库查询 失败" % com_spider)
raise
def insert_com(self, com_mod: ComMod.CommentModel):
""" 将评论插入到数据库中
# 20-04-17 修改完成
# 20-04-23 Rollback BUG Fix
:param com_mod:
:return:
"""
try:
insert_sql = "insert into Comment(com_content, com_cus_id," \
" com_art_id, com_time, com_spider, com_legal)" \
" values ('%s', %d, %d, '%s', '%s', %d)" \
% (com_mod.com_content, com_mod.com_cus_id,
com_mod.com_art_id,com_mod.com_time, com_mod.com_spider, com_mod.com_legal)
self.__base.execute_sql(insert_sql)
self.__base.commit_transactions()
# logging.info("insert_com 评论 com_spider=%s 数据库插入 成功" % com_mod.com_spider)
except:
# self.__base.commit_rollback()
# logging.exception("insert_com 评论 com_spider=%s 数据库插入 失败" % com_mod.com_spider)
raise
def search_com_id_by_spider(self, com_spider):
""" 利用 spider 查询用户 id
# 20-04-17 修改完成
:param com_spider:
:return:
"""
try:
search_sql = "select com_id from Comment where com_spider = '%s'" % com_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
# logging.info("search_com_id_by_spider 评论 com_spider=%s 数据库查询: com_id 值: %s" % (com_spider, result[0]))
return result[0]
except:
# logging.info("search_com_id_by_spider 评论 com_spider=%s 数据库查询: com_id 失败" % com_spider)
raise
def check_com_cus_relationship(self, art_id, com_id, cus_id):
""" 检查 com cus 是否存在在数据库中
# 20-04-17 修改完成
:param art_id:
:param com_id:
:param cus_id:
:return:
"""
try:
search_sql = "select count(*) from Comment " \
"where com_art_id=%d and com_id=%d and com_cus_id=%d" % (art_id, com_id, cus_id)
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
# logging.info("评论关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 不存在" % (art_id, cus_id))
return False
else:
# logging.info("评论关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 存在" % (art_id, cus_id))
return True
except:
# logging.exception("评论关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 错误" % (art_id, cus_id))
raise
================================================
FILE: spider/dao/CustomerDao.py
================================================
import util.MySql as MySql
import model.CustomerModel as CusMod
import logging
class CustomerDao:
""" 负责用户的数据库操作
# 20-04-17 针对新的 SQL 完成修改.
"""
def __init__(self, base: MySql.MySql):
self.__base = base
def is_cus_name_exist(self, cus_name):
""" 检查用户名是否已经存在在数据库中
# 20-04-17 创建方法
:param cus_name:
:return:
"""
try:
search_sql = "select count(*) from Customer where cus_name = '%s'" % cus_name
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
logging.info("用户 cus_name=%s 数据库查询 不存在" % cus_name)
return False
else:
logging.info("用户 cus_name=%s 数据库查询 已存在" % cus_name)
return True
except:
logging.exception("用户 cus_name=%s 数据库查询 失败" % cus_name)
raise
def get_cus_by_name(self, cus_mod: CusMod.CustomerModel):
""" 通过用户名, 获取用户的信息
20-04-17 创建方法
:param cus_mod: 保证 cus_name 被填充
:return:
"""
try:
search_sql = "select cus_id, cus_name, cus_pass, cus_spider, cus_avatar_url, cus_style, cus_legal" \
" from Customer where cus_name = '%s'" % cus_mod.cus_name
self.__base.execute_sql(search_sql)
result = self.__base.get_result_all()
cus_mod.cus_id = result[0][0]
# cus_mod.cus_name = result[1]
cus_mod.cus_pass = result[0][2]
cus_mod.cus_spider = result[0][3]
cus_mod.cus_avatar_url = result[0][4]
cus_mod.cus_style = result[0][5]
cus_mod.cus_legal = result[0][6]
logging.info("用户 cus_name=%s 数据库查询 完成" % cus_mod.cus_name)
except:
logging.exception("用户 cus_name=%s 数据库查询 失败" % cus_mod.cus_name)
raise
def insert_then_get_cus(self, cus_mod: CusMod.CustomerModel):
""" 检查用户是否存在, 确认是否插入用户, 获取用户 ID.
# 20-04-17 创建方法
:param cus_mod:
:return:
"""
try:
# 如果用户不存在, 就直接插入
if not self.is_cus_name_exist(cus_mod.cus_name):
self.insert_cus(cus_mod)
# 直接从数据库中更新数据.
self.get_cus_by_name(cus_mod)
logging.info("数据库处理成功")
except:
logging.exception("数据库处理失败")
raise
def insert_cus(self, cus_mod: CusMod.CustomerModel):
""" 向数据库中插入用户数据
# 20-04-17 检查 OK
# 20-04-23 Rollback BUG Fix
:param cus_mod:
:return:
"""
try:
insert_sql = "insert into Customer(cus_name, cus_pass, cus_spider, cus_avatar_url, " \
"cus_style, cus_legal)" \
" values ('%s', '%s', '%s', '%s', '%s', %d)" \
% (cus_mod.cus_name, cus_mod.cus_pass, cus_mod.cus_spider, cus_mod.cus_avatar_url,
cus_mod.cus_style, cus_mod.cus_legal)
self.__base.execute_sql(insert_sql)
self.__base.commit_transactions()
logging.info("用户 cus_spider=%s 数据库插入 成功" % cus_mod.cus_spider)
except:
# self.__base.commit_rollback()
logging.exception("用户 cus_spider=%s 数据库插入 失败" % cus_mod.cus_spider)
raise
def insert_cus_behavior(self, cbr_cus_id_from, cbr_cus_id_to, cbr_behavior, cbr_art_id, cbr_type, cbr_target_id, cbr_time=None):
""" 插入用户行为
# 20-04-17 修改完成
# 20-04-19 方法修改, 将第二个 insert 移除, 同时允许行为时间为空
# 20-04-23 Rollback BUG Fix
:param cbr_cus_id_from:
:param cbr_cus_id_to:
:param cbr_behavior:
:param cbr_time:
:param cbr_art_id:
:param cbr_type:
:param cbr_target_id:
:return:
"""
try:
if cbr_time is not None:
insert_sql = "insert into CusBehaviorRecord(cbr_cus_id_from, cbr_cus_id_to, cbr_behavior, cbr_time, cbr_art_id, cbr_type, cbr_target_id) " \
"values (%d, %d, %d, '%s', %d, %d, %d)" \
% (cbr_cus_id_from, cbr_cus_id_to, cbr_behavior, cbr_time, cbr_art_id, cbr_type, cbr_target_id)
else:
insert_sql = "insert into CusBehaviorRecord(cbr_cus_id_from, cbr_cus_id_to, cbr_behavior, cbr_art_id, cbr_type, cbr_target_id) " \
"values (%d, %d, %d, %d, %d, %d)" \
% (cbr_cus_id_from, cbr_cus_id_to, cbr_behavior, cbr_art_id, cbr_type, cbr_target_id)
# insert_sql1 = "insert into CusBehaviorRecord(cbr_cus_id_from, cbr_cus_id_to, cbr_behavior, cbr_time, cbr_art_id, cbr_type, cbr_target_id) " \
# "values (%d, %d, 2, '%s', %d, %d, %d)" \
# % (cbr_cus_id_from, cbr_cus_id_to, cbr_time, cbr_art_id, cbr_type, cbr_target_id)
self.__base.execute_sql(insert_sql)
# self.__base.execute_sql(insert_sql1)
self.__base.commit_transactions()
logging.info("用户 cus_id=%s 与用户 cus_id=%s 行为 %s 数据库插入 成功" % (cbr_cus_id_from, cbr_cus_id_to, cbr_behavior))
except:
# self.__base.commit_rollback()
logging.exception("用户 cus_id=%s 与用户 cus_id=%s 行为 %s 数据库插入 失败" % (cbr_cus_id_from, cbr_cus_id_to, cbr_behavior))
raise
def update_cus_feature(self, category, cus_id, update_num=2, flag=False):
""" 更新用户统计数据
这将是一个非常操蛋的方法.
# 20-04-17 修改完成
# 20-04-18 BUG 修改: 每调用一次此方法, 用户特征的增加应该与文章特征的增加保持一致, 即增加 2, 而非 1.
# 20-04-23 接口修改, 添加 flag 字段
# 20-04-23 Rollback BUG Fix
# 20-05-15 修改方法, 允许设置特征更新数量
:param category:
:param cus_id:
:param update_num: 每调用一次此方法, 需要用户特征的增加应该与文章特征的增加保持一致, 有些时候是 1, 有些时候是 2, 默认为 2
:param flag:
当 flag 为 True 且 cus_id 指向的用户已存在时, category 参数将失效,
用于插入一个仅有 cfc_cus_id 的记录, 即初始化.
:return:
"""
try:
search_sql = "select count(*) from CusFeatureCount where cfc_cus_id=%d" % cus_id
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
# logging.info("特征 用户 cus_id=%s 数据库查询 不存在" % (cus_id))
if flag:
# 发现用户不存在, 只是想单纯地创建用户
update_sql = "insert into CusFeatureCount(cfc_cus_id) value (%d)" % cus_id
else:
# 发现用户不存在, 在创建用户的基础上, 还想更新一些数据.
update_sql = "insert into CusFeatureCount(cfc_cus_id, {0}) values(%d, %d)" \
.format('cfc_' + category) % (cus_id, update_num)
else:
# logging.info("特征 用户 cus_id=%s 数据库查询 存在" % (cus_id))
update_sql = "update CusFeatureCount set {0}={1}+{2} where cfc_cus_id=%d" \
.format('cfc_' + category, 'cfc_' + category, update_num) % cus_id
self.__base.execute_sql(update_sql)
# logging.info("用户 cus_id=%s 类别 %s 特征 数据库插入 成功" % (cus_id, category))
except:
# self.__base.commit_rollback()
# logging.exception("用户 cus_id=%s 类别 %s 特征 数据库插入 失败" % (cus_id, category))
raise
@DeprecationWarning
def is_cus_exist(self, cus_spider):
""" 检查用户是否存在与数据库
:param cus_spider:
:return:
"""
try:
search_sql = "select count(*) from Customer where cus_spider = '%s'" % cus_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
# logging.info("用户 cus_spider=%s 数据库查询 不存在" % cus_spider)
return False
else:
# logging.info("用户 cus_spider=%s 数据库查询 已存在" % cus_spider)
return True
except:
# logging.exception("用户 cus_spider=%s 数据库查询 失败" % cus_spider)
raise
@DeprecationWarning
def search_cus_id_by_spider(self, cus_spider):
""" 利用 spider 查询用户 id
:param cus_spider:
:return:
"""
try:
search_sql = "select cus_id from Customer where cus_spider = '%s'" % cus_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
# logging.info("用户 cus_spider=%s 数据库查询: cus_id 值: %s" % (cus_spider, result[0]))
return result[0]
except:
# logging.info("用户 cus_spider=%s 数据库查询 cus_id 失败" % cus_spider)
raise
================================================
FILE: spider/dao/ReplyDao.py
================================================
import util.MySql as MySql
import model.ReplyModel as RepMod
import logging
class ReplyDao:
""" 回复消息数据库处理
# 20-04-17 依据新的 SQL 修改
"""
def __init__(self, base: MySql.MySql):
self.__base = base
def is_rep_exist(self, rep_spider):
""" 通过 spider 判断回复是否存在在数据库中
# 20-04-17 修改完成
:param rep_spider:
:return:
"""
try:
search_sql = "select count(*) from Reply where rep_spider = '%s'" % rep_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
# logging.info("is_rep_exist 回复 rep_spider=%s 数据库查询 不存在" % rep_spider)
return False
else:
# logging.info("is_rep_exist 回复 rep_spider=%s 数据库查询 已存在" % rep_spider)
return True
except:
# logging.exception("is_rep_exist 回复 rep_spider=%s 数据库查询 失败" % rep_spider)
raise
def insert_rep(self, rep_mod: RepMod.ReplyModel):
""" 插入回复数据
# 20-04-17 修改完成
# 20-04-23 Rollback BUG Fix
:param rep_mod:
:return:
"""
try:
if rep_mod.rep_rep_id is None:
insert_sql = "insert into Reply(rep_content, rep_type, rep_time," \
" rep_cus_id, rep_art_id, rep_com_id, " \
" rep_spider, rep_legal)" \
" values ('%s', %d, '%s', %d, %d, %d, '%s', %d)" \
% (str(rep_mod.rep_content), int(rep_mod.rep_type), str(rep_mod.rep_time),
int(rep_mod.rep_cus_id), int(rep_mod.rep_art_id), int(rep_mod.rep_com_id),
str(rep_mod.rep_spider), int(rep_mod.rep_legal))
else:
insert_sql = "insert into Reply(rep_content, rep_type, rep_time," \
" rep_cus_id, rep_art_id, rep_com_id, " \
"rep_rep_id, rep_spider, rep_legal)" \
" values ('%s', %d, '%s', %d, %d, %d, %d, '%s', %d)" \
% (str(rep_mod.rep_content), int(rep_mod.rep_type), str(rep_mod.rep_time),
int(rep_mod.rep_cus_id), int(rep_mod.rep_art_id), int(rep_mod.rep_com_id),
int(rep_mod.rep_rep_id), str(rep_mod.rep_spider), int(rep_mod.rep_legal))
self.__base.execute_sql(insert_sql)
self.__base.commit_transactions()
# logging.info("回复 rep_spider=%s 数据库插入 成功" % rep_mod.rep_spider)
except:
# self.__base.commit_rollback()
# logging.exception("回复 rep_spider=%s 数据库插入 失败" % rep_mod.rep_spider)
raise
def search_rep_id_by_spider(self, rep_spider):
""" 通过 spider 获取回复的 id
# 20-04-17 修改完成
:param rep_spider:
:return:
"""
try:
search_sql = "select rep_id from Reply where rep_spider = '%s'" % rep_spider
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
# logging.info("新闻 rep_spider=%s 数据库查询: rep_id 值: %s" % (rep_spider, result[0]))
return result[0]
except:
# logging.info("新闻 rep_spider=%s 数据库查询 rep_id 失败" % rep_spider)
raise
def check_rep_cus_relationship(self, art_id, rep_id, cus_id):
""" 检查回复与用户是否存在在数据库中
# 20-04-17 修改完成
:param art_id:
:param rep_id:
:param cus_id:
:return:
"""
try:
search_sql = "select count(*) from Reply " \
"where rep_art_id=%d and rep_id=%d and rep_cus_id=%d" % (art_id, rep_id, cus_id)
self.__base.execute_sql(search_sql)
result = self.__base.get_result_one()
if result[0] == 0:
# logging.info("回复关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 不存在" % (art_id, cus_id))
return False
else:
# logging.info("回复关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 存在" % (art_id, cus_id))
return True
except:
# logging.exception("回复关系 新闻 art_id=%s 用户 cus_id=%s 数据库查询 错误" % (art_id, cus_id))
raise
def search_rep_rep_by_spyder(self, rep_json, rep_mod: RepMod.ReplyModel):
""" 处理回复的回复
# 20-04-17 修改完成
:param rep_json:
:param rep_mod:
:return:
"""
try:
rep_rep_spider = str(rep_json['reply_to_comment']['id'])
rep_mod.rep_rep_id = self.search_rep_id_by_spider(rep_rep_spider)
rep_mod.rep_type = 1
# logging.info("rep_rep_id 与 rep_type 数据库查询 成功")
except:
rep_mod.rep_rep_id = None
rep_mod.rep_type = 0
# logging.warning("rep_rep_id 与 rep_type 数据库查询 失败")
================================================
FILE: spider/dao/__init__.py
================================================
================================================
FILE: spider/model/ArticleModel.py
================================================
class ArticleModel:
def __init__(self):
self.art_id = None
self.art_title = None
self.art_content = None
self.art_spider = None
self.art_type = None
self.art_tags = None
self.art_image_url = None
self.art_time = None
self.art_cus_id = None
self.art_legal = 1
================================================
FILE: spider/model/CommentModel.py
================================================
class CommentModel:
def __init__(self):
self.com_id = None
self.com_content = None
self.com_time = None
self.com_cus_id = None
self.com_art_id = None
self.com_spider = None
self.com_legal = 1
================================================
FILE: spider/model/CustomerModel.py
================================================
class CustomerModel:
def __init__(self):
"""
cus_gender 默认 0
cus_time 默认插入时间
20-04-17 修改
"""
self.cus_id = None
self.cus_name = None
self.cus_pass = None
self.cus_spider = None
self.cus_avatar_url = None
self.cus_style = None
self.cus_legal = 1
================================================
FILE: spider/model/ReplyModel.py
================================================
class ReplyModel:
def __init__(self):
self.rep_id = None
self.rep_content = None
self.rep_type = None
self.rep_time = None
self.rep_spider = None
self.rep_cus_id = None
self.rep_art_id = None
self.rep_com_id = None
self.rep_rep_id = None
self.rep_legal = 1
================================================
FILE: spider/model/__init__.py
================================================
================================================
FILE: spider/process/ArticleProcess.py
================================================
import util.Request as Request
import util.Time as Time
import util.Driver as Driver
import model.ArticleModel as ArtMod
import model.CustomerModel as CusMod
import logging
class ArticleProcess:
""" 文章数据的获取与填充填充
# 20-17-04 依据新的 SQL 修改
"""
def get_arts_brief_json_by_category(self, category):
""" 获取文章缩率信息, 不包括文章内容
# 20-04-17 方法检查 OK
参考接口:
http://m.toutiao.com/list/?tag=__all__&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618
:param category:
'社会': 'news_society',
'娱乐': 'news_entertainment',
'科技': 'news_tech',
'军事': 'news_military',
'体育': 'news_sports'
'汽车': 'news_car',
'财经': 'news_finance',
'国际': 'news_world',
'时尚': 'news_fashion',
'旅游': 'news_travel',
'探索': 'news_discovery',
'育儿': 'news_baby',
'养生': 'news_regimen',
'故事': 'news_story',
'美文': 'news_essay',
'游戏': 'news_game',
'历史': 'news_history',
'美食': 'news_food',
:return:
example:
[
Object{...},
Object{...},
Object{...},
{
"media_name":"呦呦科学馆",
"ban_comment":0,
"abstract":"大家对屎壳郎都会有所耳闻,屎壳郎的原名是蜣螂(qiāng láng),一种痴迷于推粪球的昆虫,我们一直以来都没有什么正面评价,有时候在评价一个人不好的时候,会把他说成是屎壳郎。虽然屎壳郎在我们这儿,没有什么正面的评价,但是有些地方把它当成是神哦!不是负面的神,而是太阳神!",
"image_list":[
{
"url":"http://p6-tt.byteimg.com/img/pgc-image/a9b944c7417847e687b5f97aeb2798ad~tplv-tt-cs0:640:360.jpg",
"width":640,
"height":360
},
{
"url":"http://p6-tt.byteimg.com/img/pgc-image/2ca2e18621f942daa6b7e602205b4492~tplv-tt-cs0:525:576.jpg",
"width":525,
"height":295
},
{
"url":"http://p9-tt.byteimg.com/img/pgc-image/3605e63e394e45b494d2cc5914662a8c~tplv-tt-cs0:628:344.jpg",
"width":628,
"height":344
}
],
"datetime":"2020-01-03 20:30",
"article_type":0,
"more_mode":true,
"tag":"news_story",
"has_m3u8_video":0,
"display_dt":1577527020,
"has_mp4_video":0,
"aggr_type":1,
"cell_type":0,
"article_sub_type":0,
"bury_count":0,
"title":"屎壳郎的故事",
"source_icon_style":1,
"tip":0,
"has_video":false,
"share_url":"http://toutiao.com/a6775056296904229390/?app=news_article&is_hit_share_recommend=0",
"source":"呦呦科学馆",
"comment_count":0,
"article_url":"http://toutiao.com/group/6775056296904229390/",
"publish_time":1577527020,
"group_flags":0,
"gallary_image_count":4,
"action_extra":"{"channel_id": 3189398979}",
"tag_id":"6775056296904229390",
"source_url":"/i6775056296904229390/",
"display_url":"http://toutiao.com/group/6775056296904229390/",
"is_stick":false,
"item_id":"6775056296904229390",
"repin_count":12,
"cell_flag":262155,
"source_open_url":"sslocal://profile?uid=566976876133454",
"level":0,
"digg_count":6,
"behot_time":1578054637,
"hot":0,
"cursor":1578054637999,
"url":"http://toutiao.com/group/6775056296904229390/",
"user_repin":0,
"has_image":true,
"video_style":0,
"media_info":{
"avatar_url":"http://p1.pstatp.com/large/ffe800001f90d3b65398",
"media_id":1629031487078411,
"name":"呦呦科学馆",
"user_verified":true
},
"group_id":"6775056296904229390"
},
Object{...}
]
:param category:
:return:
"""
try:
url = 'http://m.toutiao.com/list/?tag={0}&ac=wap&count=20&format=json_raw&as=A17538D54D106FF&cp=585DF0A65F0F1E1&min_behot_time=1482491618'.format(category)
headers = {
"Host": "m.toutiao.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
cookies = {
"tt_webid": "6754560229981750791",
"WEATHER_CITY": "%E5%8C%97%E4%BA%AC",
"csrftoken": "4e76bad8185f77ea8b647e50e3bb0e26",
"_ga": "GA1.2.268549673.1572668703",
"__utmz": "24953151.1578051975.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)",
"__utma": "24953151.268549673.1572668703.1578051975.1578221568.2",
"SLARDAR_WEB_ID": "60060dd2-e5b2-470c-b7f4-09b8d877a031",
"ttcid": "3f7ad0f028c54aa584367115a3fa7cb028",
"__ac_nonce": "05ecba43c0082bbdfb3e",
"__ac_signature": "O2auwAAgEBACmHx7dlnWxztnr9AAGWz3dGBVQL.r0H1se1MOmPcu.0Mxo.Y9Zem1qLCx5rV13rttFjnqiBP7d4KfTBbgN8Az4ip.Po5Ht9XBlX2CZW8ZIUZV9blZ9smX461",
"s_v_web_id": "verify_kamdklmd_rcCyRT2X_ZLBe_4OQI_Brjy_Q7HFKjeCzPbt",
"__tasessionId": "gx98j8z1x1590404157654",
"tt_scid": "nLlc9z1mWQgXRhJFHC7i29KXmnHM9sZynN1Ue86iBmH5DPSaDONSaQYQz7mqHMcMf924"
}
result = Request.Request(url, headers, cookies).more()['data']
# print(result)
logging.info("获取新闻缩率信息 %s 成功" % url)
return result
except:
# logging.exception("获取新闻缩率信息 失败")
return None
def set_art(self, art_brief_json, category, art_cus_id, art_mod: ArtMod.ArticleModel):
""" 插入新闻内容
# 20-04-17 修改完成
:param art_brief_json:
:param category:
:param art_cus_id:
:param art_mod:
:return:
"""
driver = Driver.Driver.get_chrome_driver()
try:
url = "https://www.toutiao.com/a{0}/".format(art_brief_json['item_id'])
driver.implicitly_wait(3)
driver.get(url)
art_mod.art_spider = art_brief_json['item_id']
try:
if len(art_brief_json['image_list']) == 0:
art_mod.art_image_url = ''
else:
for tar in art_brief_json['image_list']:
art_mod.art_image_url = tar['url']
break
except:
art_mod.art_image_url = ''
art_mod.art_legal = 1
art_mod.art_time = Time.Time.time_trans(art_brief_json['publish_time'])
art_mod.art_cus_id = art_cus_id
art_mod.art_id = None
try:
art_mod.art_tags = art_brief_json['keywords']
except:
art_mod.art_tags = ''
art_mod.art_type = category
art_mod.art_title = art_brief_json['title']
art_mod.art_content = driver.find_element_by_class_name("syl-page-article").get_attribute('innerHTML')
logging.info("设置新闻数据 url=%s 成功" % url)
except:
# logging.exception("设置新闻数据 失败")
raise
finally:
driver.close()
@DeprecationWarning
def get_art_json(self, art_brief_json):
""" 获得一则新闻的具体内荣
参考接口:
http://m.toutiao.com/i6364969235889783298/info/
:param art_brief_json:
:return:
example:
{
"detail_source":"正向娱乐energy",
"media_user":{
"screen_name":"正向娱乐energy",
"no_display_pgc_icon":false,
"avatar_url":"http://p1.pstatp.com/thumb/ff0600002c7db3631cde",
"id":"52681187308",
"user_auth_info":{
"auth_type":"0",
"other_auth":{
"interest":"优质娱乐领域创作者"
},
"auth_info":"青云计划获奖者 优质娱乐领域创作者"
}
},
"publish_time":1574065021,
"hotwords":[
{
"stress_type":0,
"hot_word":"录明星整容视频勒索"
},
{
"stress_type":1,
"hot_word":"宁静再演孝庄"
},
...
],
"labels":[
],
"title":"李诞向左,池子向右,脱口秀背后的悲喜人生",
"url":"http://toutiao.com/group/6760557790046978567/",
"high_quality_flag":"0",
"impression_count":"790339",
"is_original":true,
"is_pgc_article":true,
"content":"... ... 剩下的都是文章内容的 HTML 形式",
"source":"正向娱乐energy",
"comment_count":326,
"logo_show_strategy":"normal",
"hupu_content_image_urls":[
""
],
"creator_uid":52408555030
}
"""
try:
art_url = 'http://m.toutiao.com/i{0}/info/'.format(art_brief_json['item_id'])
headers = {
"Host": "m.toutiao.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
result = Request.Request(art_url, headers).more()['data']
logging.info("获取新闻内容数据 %s 成功" % art_url)
return result
except:
# logging.exception("获取新闻内容数据 失败")
return None
================================================
FILE: spider/process/CommentProcess.py
================================================
import util.Request as Request
import util.Time as Time
import model.CommentModel as ComMod
import logging
class CommentProcess:
""" 评论数据的获取与填充
# 20-17-04 依据新的 SQL 修改
"""
def get_coms_json(self, art_brief_json):
""" 获取评论列表
# 20-04-17 代码检查 OK
# 20-04-28 将单次获取评论的数量从 10 变成了 20
# 20-05-02 日志打印内容修改, 修改 URL 中的 aid 参数, 这个作用不明确, 先调小一点
参考接口:
https://www.toutiao.com/article/v2/tab_comments/?aid=24&app_name=toutiao-web&group_id=6732655510039822860&item_id=6732655510039822860&offset=0&count=5
:param art_brief_json:
:return:
example:
[
Object{...},
{
"comment":{
"id":6732812439848665099,
"id_str":"6732812439848665099",
"text":"见过一个女孩,搞防晒真的是全副武装,... 承认我是个女的😂",
"content_rich_span":"{"links":[]}",
"reply_count":71,
"reply_list":[
],
"digg_count":1372,
"bury_count":0,
"forward_count":0,
"create_time":1567605056,
"score":1.8130338214242776,
"user_id":1684053931865459,
"user_name":"用户896765315284",
"remark_name":"",
"user_profile_image_url":"http://sf1-ttcdn-tos.pstatp.com/img/mosaic-legacy/3793/3131589739~120x256.image",
"user_verified":false,
"interact_style":0,
"is_following":0,
"is_followed":0,
"is_blocking":0,
"is_blocked":0,
"is_pgc_author":0,
"author_badge":[
],
"author_badge_night":[
],
"verified_reason":"",
"user_bury":0,
"user_digg":0,
"user_relation":0,
"user_auth_info":"",
"user_decoration":"",
"band_url":"",
"band_name":"",
"aid":35,
"large_image_list":[
],
"thumb_image_list":[
],
"media_info":{
"name":"",
"avatar_url":""
},
"tags":null,
"platform":"feifei",
"has_author_digg":0,
"multi_media":null,
"has_multi_media":false,
"show_tags":0
},
"ad":null,
"embedded_data":null,
"id":6732812439848665099,
"cell_type":1
},
Object{...},
Object{...},
Object{...}
],
"""
try:
com_url = 'https://www.toutiao.com/api/pc/article/v4/tab_comments/?' \
'aid=1&app_name=toutiao-web&group_id={0}&item_id={1}&offset=0&count={2}' \
.format(art_brief_json['group_id'], art_brief_json['item_id'], 20)
headers = {
"Host": "www.toutiao.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
result = Request.Request(com_url, headers).more()['data']
logging.info("获取评论页数据 %s 成功" % com_url)
return result
except:
logging.exception("获取评论页数据 失败")
return None
def set_com(self, com_json, art_id, cus_id, com_mod: ComMod.CommentModel):
""" set a single comment
# 20-04-17 修改完成
:param com_json:
:param art_id:
:param cus_id:
:param com_mod:
:return:
"""
try:
com_mod.com_cus_id = cus_id
com_mod.com_time = Time.Time.time_trans(com_json['comment']['create_time'])
com_mod.com_content = com_json['comment']['text']
com_mod.com_art_id = art_id
com_mod.com_id = None
com_mod.com_legal = 1
com_mod.com_spider = str(com_json['comment']['id'])
logging.info("设置评论数据 成功")
except:
logging.exception("设置评论数据 失败")
raise
================================================
FILE: spider/process/CustomerProcess.py
================================================
import model.CustomerModel as CusMod
import util.Md5 as Md5
import logging
class CustomerProcess:
""" 用户数据的获取与填充
# 20-17-04 依据新的 SQL 修改
"""
def set_art_cus(self, art_brief_json, cus_mod: CusMod):
""" 设置用户信息
# 20-04-17 修改完成
:param art_brief_json:
:param cus_mod:
:return:
"""
try:
cus_mod.cus_spider = str(art_brief_json['media_info']['media_id'])
cus_mod.cus_legal = 1
cus_mod.cus_style = '这个用户很懒啥也没写'
cus_mod.cus_avatar_url = art_brief_json['media_info']['avatar_url']
# cus_mod.cus_name = art_brief_json['media_info']['name'] + cus_mod.cus_spider[0:4]
cus_mod.cus_name = art_brief_json['media_info']['name']
cus_mod.cus_pass = Md5.Md5.set_cus_pass("123456")
cus_mod.cus_id = None
# logging.info("设置新闻用户信息 成功")
except:
# logging.exception("设置新闻用户信息 失败")
raise
def set_com_cus(self, com_json, cus_mod: CusMod.CustomerModel):
""" 设置评论用户
# 20-04-17 修改完成
:param com_json:
:param cus_mod:
:return:
"""
try:
cus_mod.cus_spider = str(com_json['comment']['user_id'])
cus_mod.cus_legal = 1
cus_mod.cus_style = '这个用户很懒啥也没写'
cus_mod.cus_avatar_url = com_json['comment']['user_profile_image_url']
# cus_mod.cus_name = com_json['comment']['user_name'] + cus_mod.cus_spider[0:4]
cus_mod.cus_name = com_json['comment']['user_name']
cus_mod.cus_pass = Md5.Md5.set_cus_pass("123456")
cus_mod.cus_id = None
# logging.info("设置评论用户信息 成功")
except:
# logging.exception("设置评论用户信息 失败")
raise
def set_rep_cus(self, rep_json, cus_mod: CusMod.CustomerModel):
""" 设置回复用户
# 20-04-17 修改完成
:param rep_json:
:param cus_mod:
:return:
"""
try:
cus_mod.cus_spider = str(rep_json['user']['user_id'])
cus_mod.cus_legal = 1
try:
cus_mod.cus_style = rep_json['user']['description']
if cus_mod.cus_style == '':
cus_mod.cus_style = '这个用户很懒啥也没写'
except:
# logging.warning("cus_style 获取 失败")
cus_mod.cus_style = '这个用户很懒啥也没写'
cus_mod.cus_avatar_url = rep_json['user']['avatar_url']
# cus_mod.cus_name = rep_json['user']['name'] + cus_mod.cus_spider[0:4]
cus_mod.cus_name = rep_json['user']['name']
cus_mod.cus_pass = Md5.Md5.set_cus_pass("123456")
cus_mod.cus_id = None
# logging.info("设置回复用户信息 成功")
except:
# logging.exception("设置回复用户信息 失败")
raise
================================================
FILE: spider/process/ReplyProcess.py
================================================
import util.Request as Request
import util.Time as Time
import model.ReplyModel as RepMod
import logging
class ReplyProcess:
""" 回复数据的获取与填充
# 20-17-04 依据新的 SQL 修改
"""
def get_reps_json(self, com_json):
""" 获取 回复 包括 回复的回复 的数据
# 20-04-17 代码检查 OK
# 20-05-07 代码修改, 当获取的内容为 None 时, raise Exception
数据接口参考:
https://www.toutiao.com/2/comment/v2/reply_list/?aid=24&app_name=toutiao-web&id=6733175468666748931&offset=0&count=20&repost=0
:param com_json:
:return:
example:
[
{
"id":6777317683702185995,
"id_str":"6777317683702185995",
"create_time":1577967243,
"text":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的",
"content":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的",
"content_rich_span":"{"links":[]}",
"digg_count":6,
"forward_count":0,
"user_digg":false,
"is_owner":false,
"has_author_digg":0,
"thumb_image_list":[
],
"large_image_list":[
],
"user":{
"user_id":4540648983,
"name":"拔吊无情3344",
"screen_name":"拔吊无情3344",
"avatar_url":"http://sf6-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/ec4a0856405d4b3c92febe148fbe26e5~120x256.image",
"description":"",
"user_verified":false,
"verified_reason":"",
"user_auth_info":"",
"is_following":false,
"is_followed":false,
"is_blocking":false,
"is_blocked":false,
"author_badge":[
],
"author_badge_night":[
],
"interact_style":0,
"is_pgc_author":false,
"user_relation":0,
"user_decoration":"",
"band_url":"",
"band_name":""
},
"group":null,
"repost_params":null
},
{
"id":6777509244712976384,
"id_str":"6777509244712976384",
"create_time":1578011843,
"text":"现在我也是半个半个买了,太贵了",
"content":"现在我也是半个半个买了,太贵了",
"content_rich_span":"{"links":[]}",
"digg_count":1,
"forward_count":0,
"user_digg":false,
"is_owner":false,
"has_author_digg":0,
"thumb_image_list":[
],
"large_image_list":[
],
"user":{
"user_id":5943146542,
"name":"灵辉72330603",
"screen_name":"灵辉72330603",
"avatar_url":"http://p3.pstatp.com/thumb/5ac8001ee3bc186542d0",
"description":"",
"user_verified":false,
"verified_reason":"",
"user_auth_info":"",
"is_following":false,
"is_followed":false,
"is_blocking":false,
"is_blocked":false,
"author_badge":[
],
"author_badge_night":[
],
"interact_style":0,
"is_pgc_author":false,
"user_relation":0,
"user_decoration":"",
"band_url":"",
"band_name":""
},
"group":null,
"repost_params":null,
"reply_to_comment":{
"id":6777317683702185995,
"id_str":"6777317683702185995",
"text":"西瓜整个买的 好像只有中锅和美锅 其他都是切开卖的",
"content_rich_span":"{"links":[]}",
"status":1,
"user_id":4540648983,
"user_name":"拔吊无情3344",
"user_profile_image_url":"http://sf6-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/ec4a0856405d4b3c92febe148fbe26e5~120x256.image",
"user_verified":false,
"verified_reason":"",
"user_auth_info":"",
"is_pgc_author":false,
"is_followed":false,
"is_following":false,
"user_relation":0,
"large_image_list":[
],
"thumb_image_list":[
]
}
},
{
"id":6765863594673405963,
"id_str":"6765863594673405963",
"create_time":1575300379,
"text":"那精致的吃法。或用水果叉还撒盐。",
"content":"那精致的吃法。或用水果叉还撒盐。",
"content_rich_span":"{"links":[]}",
"digg_count":5,
"forward_count":0,
"user_digg":false,
"is_owner":false,
"has_author_digg":0,
"thumb_image_list":[
],
"large_image_list":[
],
"user":{
"user_id":64403579548,
"name":"軋軋闹猛",
"screen_name":"軋軋闹猛",
"avatar_url":"http://sf3-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/e581bc04e29a489cb38ce9a6ab689a11~120x256.image",
"description":"hello",
"user_verified":false,
"verified_reason":"",
"user_auth_info":"",
"is_following":false,
"is_followed":false,
"is_blocking":false,
"is_blocked":false,
"author_badge":[
],
"author_badge_night":[
],
"interact_style":0,
"is_pgc_author":false,
"user_relation":0,
"user_decoration":"",
"band_url":"",
"band_name":""
},
"group":null,
"repost_params":null
},
{
"id":6778028635040694272,
"id_str":"6778028635040694272",
"create_time":1578132771,
"text":"是太贵了",
"content":"是太贵了",
"content_rich_span":"{"links":[]}",
"digg_count":0,
"forward_count":0,
"user_digg":false,
"is_owner":false,
"has_author_digg":0,
"thumb_image_list":[
],
"large_image_list":[
],
"user":{
"user_id":85682550538,
"name":"高木同学w",
"screen_name":"高木同学w",
"avatar_url":"http://sf6-ttcdn-tos.pstatp.com/img/user-avatar/8a763365d363bd85011a7c3c9e319bca~120x256.image",
"description":"",
"user_verified":false,
"verified_reason":"",
"user_auth_info":"",
"is_following":false,
"is_followed":false,
"is_blocking":false,
"is_blocked":false,
"author_badge":[
],
"author_badge_night":[
],
"interact_style":0,
"is_pgc_author":false,
"user_relation":0,
"user_decoration":"",
"band_url":"",
"band_name":""
},
"group":null,
"repost_params":null,
"reply_to_comment":{
"id":6765863594673405963,
"id_str":"6765863594673405963",
"text":"那精致的吃法。或用水果叉还撒盐。",
"content_rich_span":"{"links":[]}",
"status":1,
"user_id":64403579548,
"user_name":"軋軋闹猛",
"user_profile_image_url":"http://sf3-ttcdn-tos.pstatp.com/img/tos-cn-i-0022/e581bc04e29a489cb38ce9a6ab689a11~120x256.image",
"user_verified":false,
"verified_reason":"",
"user_auth_info":"",
"is_pgc_author":false,
"is_followed":false,
"is_following":false,
"user_relation":0,
"large_image_list":[
],
"thumb_image_list":[
]
}
},
{
"id":6777979056702308367,
"id_str":"6777979056702308367",
"create_time":1578121229,
"text":"要是中国一个西瓜二百块钱,那也会用牙签吃。",
"content":"要是中国一个西瓜二百块钱,那也会用牙签吃。",
"content_rich_span":"{"links":[]}",
"digg_count":0,
"forward_count":0,
"user_digg":false,
"is_owner":false,
"has_author_digg":0,
"thumb_image_list":[
],
"large_image_list":[
],
"user":{
"user_id":109181261304,
"name":"7853624655665862",
"screen_name":"7853624655665862",
"avatar_url":"http://sf1-ttcdn-tos.pstatp.com/img/mosaic-legacy/3797/2889309425~120x256.image",
"description":"",
"user_verified":false,
"verified_reason":"",
"user_auth_info":"",
"is_following":false,
"is_followed":false,
"is_blocking":false,
"is_blocked":false,
"author_badge":[
],
"author_badge_night":[
],
"interact_style":0,
"is_pgc_author":false,
"user_relation":0,
"user_decoration":"",
"band_url":"",
"band_name":""
},
"group":null,
"repost_params":null
}
]
"""
try:
reply_url = 'https://www.toutiao.com/api/pc/2/comment/v4/reply_list/?' \
'aid=24&app_name=toutiao-web&id={0}&offset=0&count={1}&repost=0' \
.format(com_json['id'], 20)
headers = {
"Host": "www.toutiao.com",
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
result = Request.Request(reply_url, headers).more()['data']['data']
if result is None:
raise
logging.info("获取回复信息 %s 成功" % reply_url)
return result
except:
# logging.exception("获取回复信息 失败")
raise
def set_rep(self, rep_json, rep_art_id, rep_com_id, rep_cus_id, rep_mod: RepMod.ReplyModel):
""" 填充回复数据
# 20-04-17 修改完成
:param rep_json:
:param rep_art_id:
:param rep_com_id:
:param rep_cus_id:
:param rep_mod:
:return:
"""
try:
rep_mod.rep_rep_id = None
rep_mod.rep_legal = 1
rep_mod.rep_cus_id = rep_cus_id
rep_mod.rep_time = Time.Time.time_trans(rep_json['create_time'])
rep_mod.rep_content = rep_json['text']
rep_mod.rep_com_id = rep_com_id
rep_mod.rep_id = None
rep_mod.rep_art_id = rep_art_id
rep_mod.rep_spider = str(rep_json['id'])
# logging.info("设置回复信息 成功")
except:
# logging.exception("获取回复信息 失败")
raise
================================================
FILE: spider/process/__init__.py
================================================
================================================
FILE: spider/util/Driver.py
================================================
import selenium.webdriver as webdriver
import selenium.webdriver.chrome.options as options
class Driver:
def __init__(self):
pass
@staticmethod
def get_chrome_driver():
""" 获得模拟浏览器 Chrome
:return:
"""
chrome_options = options.Options()
chrome_prefs = {}
chrome_options.experimental_options["prefs"] = chrome_prefs
chrome_prefs["profile.default_content_settings"] = {"images": 2}
chrome_prefs["profile.managed_default_content_settings"] = {"images": 2}
# chrome_options.add_argument('--headless')
return webdriver.Chrome(chrome_options=chrome_options)
================================================
FILE: spider/util/Json.py
================================================
import json
class Json:
def __init__(self):
pass
@staticmethod
def read_json_file(path, encoding='utf-8'):
js = open(path, encoding=encoding)
return json.load(js)
================================================
FILE: spider/util/Md5.py
================================================
import hashlib
class Md5:
@staticmethod
def set_cus_pass(password):
""" 为用户密码进行 MD-5 加密
:param password: 密码明文
:return:
"""
hl = hashlib.md5()
hl.update(password.encode('utf-8'))
return hl.hexdigest()
================================================
FILE: spider/util/MySql.py
================================================
import pymysql
import logging
class MySql:
def __init__(self, db_name, user, password, host="localhost", charset="utf8"):
self.__db_handle = pymysql.connect(host=host,
user=user,
password=password,
db=db_name,
charset=charset)
self.__cursor = self.__db_handle.cursor()
def execute_sql(self, sql):
# logging.info("%s" % sql)
self.__cursor.execute(sql)
def commit_transactions(self):
# logging.info("事务提交")
self.__db_handle.commit()
def commit_rollback(self):
# logging.info("提交回滚")
self.__db_handle.rollback()
def get_result_all(self):
result = self.__cursor.fetchall()
# logging.info("获得数据: %s" % result)
return result
def get_result_one(self):
result = self.__cursor.fetchone()
# logging.info("获得数据: %s" % result)
return result
================================================
FILE: spider/util/Request.py
================================================
import requests
class Request:
""" 用于创建访问请求
"""
def __init__(self, url, headers=None, cookies=None):
self.__url = url
self.__headers = headers
self.__cookie = cookies
def set_url(self, url):
self.__url = url
def set_headers(self, headers):
self.__headers = headers
def set_cookie(self, cookie):
self.__cookie = cookie
def more(self):
resp = requests.get(self.__url, headers=self.__headers, cookies=self.__cookie)
resp.encoding = 'utf8mb4'
self.__cookie = resp.cookies
return resp.json()
================================================
FILE: spider/util/Time.py
================================================
import time
class Time:
@staticmethod
def time_trans(time_data):
time_stamp = time_data
time_array = time.localtime(time_stamp)
return time.strftime("%Y-%m-%d %H-%M-%S", time_array)
@staticmethod
def get_local_time():
return Time.time_trans(time.time())
================================================
FILE: spider/util/__init__.py
================================================