User:Thurallor-bot/template to xml.py

From Lotro-Wiki.com
Jump to navigation Jump to search
import mwparserfromhell
import xml.etree.ElementTree as xmllib
import re
import pywikibot
from pywikibot import textlib
root_elmt = xmllib.Element('data')
site = pywikibot.Site()
template_to_find = 'Deed'
want_context = False
gen = pywikibot.Page(site, "Template:" + template_to_find).getReferences(only_template_inclusion = True)
for page in gen:

    title = page.title()
    print('Processing: ' + title)
    text = page.text
    page_elmt = xmllib.SubElement(root_elmt, 'page')
    page_elmt.set('name', title);

    # Extract the specified template from the article
    parsed = mwparserfromhell.parse(text)
    found_template = None
    for template in parsed.filter_templates():
        if template.name.matches(template_to_find):
            template_elmt = xmllib.SubElement(page_elmt, 'template')
            template_elmt.set('name', template_to_find)
            for param in template.params:
                param_elmt = xmllib.SubElement(template_elmt, 'param')
                param_elmt.set('name', param.name)
                param_elmt.text = str(param.value);
            found_template = template
            break
    if not found_template:
        # Template not present in the article; must be transcluded.  Skip this article.
        root_elmt.remove(page_elmt)
        continue

    # Get the text before and after the template
    if want_context:
        before_text = ''
        after_text = ''
        before = True
        for node in parsed.nodes:
            if node == found_template:
                before = False
            else:
                if before:
                    before_text = before_text + str(node)
                else:
                    after_text = after_text + str(node)
        template_elmt.text = before_text
        template_elmt.tail = after_text

    #if title == 'Enmity of the Dead':
    #    break

data = xmllib.tostring(root_elmt)
file = open('output.xml', 'wb')
file.write(data)