本文实例讲述了python解析xml文件操作的实现方法。分享给大家供大家参考。具体方法如下:

xml文件内容如下:

<?xml version="1.0" ?>
<!--Simple xml document__chapter 8-->
<book>
  <title>
    sample xml thing
  </title>
  <author>
    <name>
      <first>
        ma
      </first>
      <last>
        xiaoju
      </last>
    </name>
    <affiliation>
      Springs Widgets, Inc.
    </affiliation>
  </author>
  <chapter number="1">
    <title>
      First
    </title>
    <para>
      I think widgets are greate.You should buy lots of them forom
      <company>
        Spirngy Widgts, Inc
      </company>
    </para>
  </chapter>
</book>

python代码

from xml.dom import minidom, Node
import re, textwrap 

class SampleScanner:
  """""" 

  def __init__(self, doc):
    """Constructor"""
    assert(isinstance(doc, minidom.Document))
    for child in doc.childNodes:
      if child.nodeType == Node.ELEMENT_NODE and \
        child.tagName == "book":
        self.handle_book(child) 

  def handle_book(self, node): 

    for child in node.childNodes:
      if child.nodeType != Node.ELEMENT_NODE:
        continue
      if child.tagName == "title":
        print "Book titile is:", self.gettext(child.childNodes)
      if child.tagName == "author":
        self.handle_author(child)
      if child.tagName == "chapter":
        self.handle_chapter(child) 

  def handle_chapter(self, node):
    number = node.getAttribute("number")
    print "number:", number
    title_node = node.getElementsByTagName("title")
    print "title:", self.gettext(title_node) 

    for child in node.childNodes:
      if child.nodeType != Node.ELEMENT_NODE:
        continue
      if child.tagName == "para":
        self.handle_chapter_para(child) 

  def handle_chapter_para(self, node):
    company = ""
    company = self.gettext(node.getElementsByTagName("company"))
    print "chapter:para:company", company 

  def handle_author(self, node):
    for child in node.childNodes:
      if child.nodeType != Node.ELEMENT_NODE:
        continue
      if child.tagName == "name":
        self.handle_author_name(child)
      if child.tagName == "affiliation":
        print "affiliation:", self.gettext(child.childNodes) 

  def handle_author_name(self, node):
    first = ""
    last = ""
    for child in node.childNodes:
      if child.nodeType != Node.ELEMENT_NODE:
        continue
      if child.tagName == "first":
        first = self.gettext(child.childNodes)
      if child.tagName == 'last':
        last = self.gettext(child.childNodes) 

    print "firstname:%s,lastname:%s" % (first, last) 

  def gettext(self, nodelist):
    retlist = []
    for node in nodelist:
      if node.nodeType == Node.TEXT_NODE:
        retlist.append(node.wholeText)
      elif node.hasChildNodes:
        retlist.append(self.gettext(node.childNodes)) 

    return re.sub('\s+', " ", ''.join(retlist)) 

if __name__=="__main__":
  doc = minidom.parse("simple.xml")
  sample = SampleScanner(doc)

希望本文所述对大家的Python程序设计有所帮助。

声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。