import re import os import json class _resInTextForTaskB(): def __init__(self): self.id = "" self.title = "" self.en_title = "" self.college = "" self.info = "" self.source = "" def __str__(self): return self.display() def display(self): res = "" res += "id:%s\n" % self.id res += "title:%s\n" % self.title res += "en_title:%s\n" % self.en_title res += "college:%s\n" % self.college res += "info:%s\n" % self.info res += "source:%s" % self.source return res class ReForTaskB(): reId = re.compile(r'"http://win\.bupt\.edu\.cn/program\.do\?id=(\d+)"') reTitle = re.compile(r'

\s*(.*?)\s*

') reEnTitle = re.compile(r'
\s*(.*?)\s*
') reCollege = re.compile(r'

\s*(.*?)\s*

') reInfo = re.compile(r'
\s*
\s*(.*?)(\s*
){7}', re.S) reSource = re.compile(r'\[\{"score":".*?","type":".*?","time":".*?","name":"(.*?)"\}') def search(self, text): res = _resInTextForTaskB() res.id = self.reId.search(text).group(1) res.title = self.reTitle.search(text).group(1) try: res.en_title = self.reEnTitle.search(text).group(1) except AttributeError: res.en_title = "" try: res.college = self.reCollege.search(text).group(1) except AttributeError: res.college = "" try: res.info = self.reInfo.search(text).group(1).replace("\r\n"," ").replace("\n"," ").replace("\r"," ") except AttributeError: res.info = "" try: res.source = self.reSource.search(text).group(1).encode('utf-8').decode('unicode_escape') except AttributeError: res.source = "" return res class ReForTaskA(): def __init__(self): pass class OutPutFile(): def __init__(self, fileName, encoding): self.fileName = fileName self.encoding = encoding def write(self, text): if os.path.exists(self.fileName): text = "\n" + text with open(self.fileName,mode='a',encoding=self.encoding) as f: f.write(text) class CheckPointAndConfig(): def __init__(self, fileName): self.fileName = fileName self.data = {} self.loadData() def loadData(self): if os.path.exists(self.fileName): with open(self.fileName,'r') as load_f: self.data = json.load(load_f) else: self.data = { "number": 2017210281, "outputFile": "out.txt", "totle": 1, "no": 1, "checkpoint": 0 } self.saveData() def saveData(self): with open(self.fileName,"w") as dump_f: json.dump(self.data,dump_f)