Discuss / Python / 学长说,抽象编程就是保证可读性的情况下,代码量越少越好

学长说,抽象编程就是保证可读性的情况下,代码量越少越好

Topic source

Spencer

#1 Created at ... [Delete] [Delete and Lock User]

解析器部分利用页面属性特征直接偷懒了

class MyHTMLparser(HTMLParser):
    def __init__(self)  -> None:
        super().__init__()
        self.info = {
            'event-title':None,
            'datetime':None,
            'event-location':None
        }
        self.__parserdata = None

    # 除了datetime单独处理,其他的都直接索引
    def handle_starttag(self,tag,attrs) -> None:
        if tag == 'time':
            self.info['datetime'] = attrs[0][1]
        for dataTag in self.info.keys():
            if ('class', dataTag) in attrs:
                self.__parserdata = dataTag
    
    def handle_data(self, data: str) -> None:
        if not self.__parserdata is None:
            self.info[self.__parserdata] = data
            self.__parserdata = None

    # 填充完毕后打印并重新初始化
    def handle_endtag(self, tag: str) -> None:
        if not None in self.info.values():
            print(self.info)
            for key in self.info: self.info[key] = None

每次都访问获取太慢了,直接把整个页面下载下来吧

if  __name__ == '__main__':

    url = 'https://www.python.org/events/python-events/'

    if not Path('pythonEvent.html').exists:
        req = request.Request(url)
        req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50')

        with request.urlopen(req,timeout=30) as page:
            html_doc = page.read().decode('utf-8')
            print(f'status: {page.status} {page.reason}')
            with  Path('pythonEvent.html').open('w',encoding='utf-8') as f:
                for line in html_doc:
                    f.write(line)
    else:
            with  Path('pythonEvent.html').open('r',encoding='utf-8') as f:
                html_doc = f.read() 

    parser = MyHTMLparser()
    parser.feed(html_doc)

  • 1

Reply