User:Thurallor-bot/template to xml.py
Jump to navigation
Jump to search
import mwparserfromhell import xml.etree.ElementTree as xmllib import re import pywikibot from pywikibot import textlib root_elmt = xmllib.Element('data') site = pywikibot.Site() template_to_find = 'Deed' want_context = False gen = pywikibot.Page(site, "Template:" + template_to_find).getReferences(only_template_inclusion = True) for page in gen: title = page.title() print('Processing: ' + title) text = page.text page_elmt = xmllib.SubElement(root_elmt, 'page') page_elmt.set('name', title); # Extract the specified template from the article parsed = mwparserfromhell.parse(text) found_template = None for template in parsed.filter_templates(): if template.name.matches(template_to_find): template_elmt = xmllib.SubElement(page_elmt, 'template') template_elmt.set('name', template_to_find) for param in template.params: param_elmt = xmllib.SubElement(template_elmt, 'param') param_elmt.set('name', param.name) param_elmt.text = str(param.value); found_template = template break if not found_template: # Template not present in the article; must be transcluded. Skip this article. root_elmt.remove(page_elmt) continue # Get the text before and after the template if want_context: before_text = '' after_text = '' before = True for node in parsed.nodes: if node == found_template: before = False else: if before: before_text = before_text + str(node) else: after_text = after_text + str(node) template_elmt.text = before_text template_elmt.tail = after_text #if title == 'Enmity of the Dead': # break data = xmllib.tostring(root_elmt) file = open('output.xml', 'wb') file.write(data)