Files
SpiderWithGUI/utils.py
2020-03-25 11:34:46 +08:00

87 lines
3.0 KiB
Python

import re
import os
import json
class _resInTextForTaskB():
def __init__(self):
self.id = ""
self.title = ""
self.en_title = ""
self.college = ""
self.info = ""
self.source = ""
def __str__(self):
return self.display()
def display(self):
res = ""
res += "id:%s\n" % self.id
res += "title:%s\n" % self.title
res += "en_title:%s\n" % self.en_title
res += "college:%s\n" % self.college
res += "info:%s\n" % self.info
res += "source:%s" % self.source
return res
class ReForTaskB():
reId = re.compile(r'"http://win\.bupt\.edu\.cn/program\.do\?id=(\d+)"')
reTitle = re.compile(r'<h2 style="display:inline">\s*(.*?)\s*</h2>')
reEnTitle = re.compile(r'<div style="margin-top:-7px;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;">\s*(.*?)\s*</div>')
reCollege = re.compile(r'<h3 style="display:inline;">\s*(.*?)\s*</h3>')
reInfo = re.compile(r'<br>\s*<div style="font-size:17px;line-height:25px;">\s*(.*?)(\s*</div>){7}', re.S)
reSource = re.compile(r'\[\{"score":".*?","type":".*?","time":".*?","name":"(.*?)"\}')
def search(self, text):
res = _resInTextForTaskB()
res.id = self.reId.search(text).group(1)
res.title = self.reTitle.search(text).group(1)
try:
res.en_title = self.reEnTitle.search(text).group(1)
except AttributeError:
res.en_title = ""
try:
res.college = self.reCollege.search(text).group(1)
except AttributeError:
res.college = ""
try:
res.info = self.reInfo.search(text).group(1).replace("\r\n"," ").replace("\n"," ").replace("\r"," ")
except AttributeError:
res.info = ""
try:
res.source = self.reSource.search(text).group(1).encode('utf-8').decode('unicode_escape')
except AttributeError:
res.source = ""
return res
class ReForTaskA():
def __init__(self):
pass
class OutPutFile():
def __init__(self, fileName, encoding):
self.fileName = fileName
self.encoding = encoding
def write(self, text):
if os.path.exists(self.fileName):
text = "\n" + text
with open(self.fileName,mode='a',encoding=self.encoding) as f:
f.write(text)
class CheckPointAndConfig():
def __init__(self, fileName):
self.fileName = fileName
self.data = {}
self.loadData()
def loadData(self):
if os.path.exists(self.fileName):
with open(self.fileName,'r') as load_f:
self.data = json.load(load_f)
else:
self.data = {
"number": 2017210281,
"outputFile": "out.txt",
"totle": 1,
"no": 1,
"checkpoint": 0
}
self.saveData()
def saveData(self):
with open(self.fileName,"w") as dump_f:
json.dump(self.data,dump_f)