# -*- coding: utf-8 -*- from scrapy import signals from scrapy.xlib.pydispatch import dispatcher from scrapy.contrib.exporter import BaseItemExporter import time from belwue.settings import admindomain,emaildomain class WikiItemExporter(BaseItemExporter): def __init__(self, file, **kwargs): self._configure(kwargs, dont_fail=True) self.output = file def _write_heading(self, string, output, which=1): output.write("="*which+string.encode('utf8')+"="*which+"\n") def _write_bullet(self, string, output, indent=1): output.write(" "*indent+"* "+string.encode('utf8')+"\n") def _encode_wikilink(self, linkstring, string="Link"): return "[["+linkstring+u"|"+string+"]]" class MyWikiItemExporter(WikiItemExporter): def __init__(self, header, body, **kwargs): self._configure(kwargs, dont_fail=True) self.body = body self.header = header def start_exporting(self): self._write_heading(u"Aktueller Stand der Verteilerlisten", self.header, 6) self.header.write(u"**Stand: "+time.strftime('%d.%b %Y')+u"**\n") self._write_heading(u"Alle Verteilerlisten", self.header, 5) def finish_exporting(self): pass def export_item(self, item): # print overview to header bullet = self._encode_wikilink(item['groupname']+emaildomain, item['groupprettyname'][0])\ +u": Mitglieder "\ +item['groupsize']\ +u" - " +\ self._encode_wikilink(u"https://mbox1.belwue.de:9010/DomainAdmin/"+admindomain+"/"+item['grouplink'], u"Administratorlink zur Verwaltung") self._write_bullet(bullet, self.header) # print body self._write_heading(item['groupprettyname'][0],self.body, 5) for member in item['members']: bullet = self._encode_wikilink(member+emaildomain, u"") self._write_bullet(bullet, self.body) self.body.write("\n") class BelwuePipeline(object): def __init__(self): dispatcher.connect(self.spider_opened, signal=signals.spider_opened) dispatcher.connect(self.spider_closed, signal=signals.spider_closed) def spider_opened(self, spider): self.header = open('heading.txt', 'w+b') self.body = open('body.txt','w+b') self.exporter = MyWikiItemExporter(self.header, self.body) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() self.header.close() self.body.close() def process_item(self, item, spider): self.exporter.export_item(item) return item