87 lines
3.0 KiB
Python
87 lines
3.0 KiB
Python
import re
|
|
import os
|
|
import json
|
|
class _resInTextForTaskB():
|
|
def __init__(self):
|
|
self.id = ""
|
|
self.title = ""
|
|
self.en_title = ""
|
|
self.college = ""
|
|
self.info = ""
|
|
self.source = ""
|
|
def __str__(self):
|
|
return self.display()
|
|
def display(self):
|
|
res = ""
|
|
res += "id:%s\n" % self.id
|
|
res += "title:%s\n" % self.title
|
|
res += "en_title:%s\n" % self.en_title
|
|
res += "college:%s\n" % self.college
|
|
res += "info:%s\n" % self.info
|
|
res += "source:%s" % self.source
|
|
return res
|
|
|
|
class ReForTaskB():
|
|
reId = re.compile(r'"http://win\.bupt\.edu\.cn/program\.do\?id=(\d+)"')
|
|
reTitle = re.compile(r'<h2 style="display:inline">\s*(.*?)\s*</h2>')
|
|
reEnTitle = re.compile(r'<div style="margin-top:-7px;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;">\s*(.*?)\s*</div>')
|
|
reCollege = re.compile(r'<h3 style="display:inline;">\s*(.*?)\s*</h3>')
|
|
reInfo = re.compile(r'<br>\s*<div style="font-size:17px;line-height:25px;">\s*(.*?)(\s*</div>){7}', re.S)
|
|
reSource = re.compile(r'\[\{"score":".*?","type":".*?","time":".*?","name":"(.*?)"\}')
|
|
def search(self, text):
|
|
res = _resInTextForTaskB()
|
|
res.id = self.reId.search(text).group(1)
|
|
res.title = self.reTitle.search(text).group(1)
|
|
try:
|
|
res.en_title = self.reEnTitle.search(text).group(1)
|
|
except AttributeError:
|
|
res.en_title = ""
|
|
try:
|
|
res.college = self.reCollege.search(text).group(1)
|
|
except AttributeError:
|
|
res.college = ""
|
|
try:
|
|
res.info = self.reInfo.search(text).group(1).replace("\r\n"," ").replace("\n"," ").replace("\r"," ")
|
|
except AttributeError:
|
|
res.info = ""
|
|
try:
|
|
res.source = self.reSource.search(text).group(1).encode('utf-8').decode('unicode_escape')
|
|
except AttributeError:
|
|
res.source = ""
|
|
return res
|
|
|
|
class ReForTaskA():
|
|
def __init__(self):
|
|
pass
|
|
|
|
class OutPutFile():
|
|
def __init__(self, fileName, encoding):
|
|
self.fileName = fileName
|
|
self.encoding = encoding
|
|
def write(self, text):
|
|
if os.path.exists(self.fileName):
|
|
text = "\n" + text
|
|
with open(self.fileName,mode='a',encoding=self.encoding) as f:
|
|
f.write(text)
|
|
|
|
class CheckPointAndConfig():
|
|
def __init__(self, fileName):
|
|
self.fileName = fileName
|
|
self.data = {}
|
|
self.loadData()
|
|
def loadData(self):
|
|
if os.path.exists(self.fileName):
|
|
with open(self.fileName,'r') as load_f:
|
|
self.data = json.load(load_f)
|
|
else:
|
|
self.data = {
|
|
"number": 2017210281,
|
|
"outputFile": "out.txt",
|
|
"totle": 1,
|
|
"no": 1,
|
|
"checkpoint": 0
|
|
}
|
|
self.saveData()
|
|
def saveData(self):
|
|
with open(self.fileName,"w") as dump_f:
|
|
json.dump(self.data,dump_f) |