话不多说,直接上代码,没有注释!
1、xml解析代码
import copy
import re
class TinyXmlPraser:
def __init__(self):
self.version = ''
self.nlist = []
self.root = None
self.node = None
def from_file(self, xmlpath):
filestr = self.filetostr(xmlpath)
self.nlist = self.strtolist(filestr)
self.ana_xml(self.nlist)
return self.root
def from_str(self,filestr):
self.nlist = self.strtolist(filestr)
self.ana_xml(self.nlist)
return self.root
def filetostr(self,xmlpath):
fin = open(xmlpath, 'r', encoding='utf-8')
return fin.read()
def strtolist(self,filestr):
pattse = r"<[^>]*>"
pattcontent = r"(?<=>).*\S.*?(?=<)"
patt = pattse + r'|' + pattcontent
nlist = re.findall(patt,filestr)
return nlist
def ana_xml(self, nlist):
comment = re.compile(r"<!--[^-]*-->")
ver = re.compile(r"<?.*\?>")
startend = re.compile(r"<[^/]*/>")
start = re.compile(r"<[^>]*>")
end = re.compile(r"</[^>]*>")
# 获取tag的属性值
def get_tag_attr(li):
tag = ''
attr = {}
tmp = re.match(r"<\s*([^\s>]+)[\s>]",li)
if tmp is not None:
tag = tmp.group(1)
for key, value in re.findall(r"[ <]([^= ]*)\s*=\s*[\'\"]([^\'\"]*)[\"\']",li):
attr[key] = value
return tag, attr
for li in nlist:
if re.match(comment, li) is not None:
continue
if re.match(startend, li) is not None:
tag = ''
attr = {}
tag, attr = get_tag_attr(li)
tnode = XmlNode(tag, attr, self.node)
if not self.node:
self.root = tnode
else:
self.node.add_child(tnode)
continue
if re.match(ver, li) is not None:
self.version = li
continue
if re.match(end, li) is not None:
if self.node is not None:
self.node = self.node.father
elif re.match(start, li) is not None:
tag = ''
attr = {}
tag, attr = get_tag_attr(li)
tnode = XmlNode(tag, attr, self.node)
if not self.node:
self.root = tnode
else:
self.node.add_child(tnode)
self.node = tnode
else:
self.node.text = li
def show_tree(self):
print("The tree is :")
self.root.show_node()
class XmlNode:
def __init__(self, tag, attr, father, indent=2):
self.tag = tag
self.attr = attr
self.father = father
self.text = ''
self.children = []
self.indent = ' ' * indent
def add_child(self, node):
self.children.append(node)
def iter(self, tag):
result = []
tmplist = copy.copy(self.children)
for node in tmplist:
if node.tag == tag:
result.append(node)
tmplist += node.children
return result
def get(self, key, value=None):
try:
value = self.attr[key]
except:
pass
return value
def show_node(self, prefix='', no=0):
res = '%s<%s> %s %s' % (prefix, self.tag,
(str(self.attr) if self.attr else ''),
((self.text+' ') if self.text else ''))
print(res)
num = len(self.children)
i = 1
for child in self.children:
if(num == 1):
child.show_node(prefix + self.indent)
else:
child.show_node(prefix + self.indent, i)
i += 1
2、json与xml互转代码
import json
import os
import traceback as tb
import xmltodict
import xml_parser as xp
def is_valid_json(text):
"""
判断text是否合法json串
"""
try:
j_res = json.loads(text, object_pairs_hook=obj_pairs_hook)
return True, j_res
except:
return False, {'error': tb.format_exc()}
def reg_search(rule, s):
"""
正则表达式匹配,返回匹配的groups
"""
import re
re_obj = re.compile(rule)
result = re_obj.search(s)
if result:
return result.groups()
else:
return ()
def json_format(text, indent=2):
"""
将json格式的text格式化为阅读性好的层级式json
"""
flag, j_res = is_valid_json(text)
if flag:
return json.dumps(j_res, sort_keys=True, indent=indent)
else:
error = j_res['error'].split('\n')[-2]
group = reg_search('.*char (\d+).*', error)
if not group:
return error
col = group[0]
idx = int(col)
delta = 20
start, end = idx - delta, idx + delta
if idx - delta <= 0: start = 0
if idx + delta > len(text): end = len(text)
print('{}'.format(text[start:end]))
print(' ' * (idx-start) + '^')
print(' ' * (idx-start-2) + 'error')
return error
def trans_json_get(text, indent=2):
"""
json串和url的get参数互转
"""
import urllib.parse
res = ''
try:
# json to get
j_data = json.loads(text, object_pairs_hook=obj_pairs_hook)
for k, v in j_data.items():
# 处理参数和值中的特殊符号
k = urllib.parse.quote(str(k))
v = urllib.parse.quote(str(v))
res += '{}={}&'.format(k, v)
return res[:-1]
except:
#tb.print_exc()
# get to json
import re
rule = r'.*\?(.*=.*&)+(.*=.*&?)'
reg = re.compile(rule)
if not re.match(reg, text):
text = 'api?' + urllib.parse.unquote(text)
res = dict(urllib.parse.parse_qsl(urllib.parse.urlsplit(text).query))
res = json.dumps(res, indent=indent)
return urllib.parse.unquote(res)
def obj_pairs_hook(lst):
"""
json串loads时调用的hook函数,将json中重复的key值加入列表中
e.g.: {"a":1,"a":2} -> {"a":[1,2]}
"""
result = {}
count = {}
for key, val in lst:
if key in count: count[key] += 1
else: count[key] = 1
if key in result:
if count[key] > 2: result[key].append(val)
else: result[key] = [result[key], val]
else:
result[key] = val
return result
def handle_s(s):
"""
处理特殊符号&<>
"""
return s.replace('&', '&').replace('<', '<').replace('>', '>')
idt = ' '
def parse_list(pk, lst, indent):
res = idt*indent + '<%s class="array">\n' % pk
for e in lst:
type_e = type(e)
if type_e == str:
res += idt*(indent+1) + '<e type="string">%s</e>\n' % handle_s(e)
elif type_e in [int, float]:
res += idt*(indent+1) + '<e type="number">%s</e>\n' % e
elif type_e == list:
res += parse_list('e', e, indent+1)
elif type_e == dict:
res += idt*(indent+1) + '<e class="object">\n'
res += parse_dict(e, indent+1)
res += '%s</e>\n' % (idt*(indent+1))
return '%s%s</%s>\n' % (res, idt*indent, pk)
def parse_dict(d, indent):
res = ''
for k, v in d.items():
type_v = type(v)
if isinstance(k, str): k = handle_s(k)
if type_v == str:
res += idt*(indent+1) + '<{0} type="string">{1}</{0}>\n'.format(k, handle_s(v))
elif type_v in [int, float]:
res += idt*(indent+1) + '<{0} type="number">{1}</{0}>\n'.format(k, v)
elif type_v == list:
res += parse_list(k, v, indent+1)
elif type_v == dict:
res += idt*(indent+1) + '<%s class="object">\n' % k
res += parse_dict(v, indent+1)
res += '%s</%s>\n' % (idt*(indent+1), k)
return res
def json_to_xml(j_text):
res = '<?xml version="1.0" encoding="utf-8"?>\n<xml_root>\n{0}</xml_root>'
symbols = {'<': '<', '>': '>', '&': '&'}
tmp = ''
try:
# json to get
j_data = json.loads(j_text, object_pairs_hook=obj_pairs_hook)
tmp = parse_dict(j_data, 0)
except:
tb.print_exc()
return res.format(tmp)
def trans_xmlnode(node):
res = ''
node_class = node.attr.get('class', '')
node_type = node.attr.get('type', '')
if node_class == 'array':
res += '['
for nc in node.children:
res += trans_xmlnode(nc)
res = res[:-1] + ']'
elif node_class == 'object':
res += '{'
for nc in node.children:
res += '"%s": %s' % (nc.tag, trans_xmlnode(nc))
res += '}'
elif node_type == 'number':
res += '%s' % node.text
elif node_type == 'string':
res += '"%s"' % node.text
res += ','
return res
def xml_to_json(xml):
d_res = {}
if os.path.exists(xml) and xml.endswith('.xml'):
root = xp.TinyXmlPraser().from_file(xml)
else:
root = xp.TinyXmlPraser().from_str(xml)
for c in root.children:
if c.text:
d_res[c.tag] = c.text
else:
d_res[c.tag] = trans_xmlnode(c)[:-1]
for k, v in d_res.items():
d_res[k] = v.replace(',]', ']').replace(',}', '}')
return json.dumps(d_res)
def json_to_xml2(j_text):
res = ''
try:
j_data = json.loads(j_text, object_pairs_hook=obj_pairs_hook)
# json字典长度不为1时,表示json没有根节点
# 需要手动增加一个根节点xml_root
if len(j_data) != 1:
j_data = {'xml_root': j_data}
res = xmltodict.unparse(j_data, encoding='utf-8')
except:
tb.print_exc()
return res
def xml_to_json2(xml, indent=2):
if os.path.exists(xml) and xml.endswith('.xml'):
with open(xml, 'r') as f:
xml = f.read()
j_data = xmltodict.parse(xml)
j_res = json.dumps(j_data, indent=indent)
return j_res
if __name__ == '__main__':
text = '{"a": 123, "a":1.2, "a":"<a></a>", "ee": {"c": {"d": "123"}, "cc":[1,3]}, "f": "&ff=f", "g":999, "h":[1,[2,3],{"s":[4,5], "b":333}]}'
print(text)
res = json_format(text)
print('json format result:\n', res)
print()
res = trans_json_get(text)
print('trans json to get:\n', res)
print()
#text = 'a=%3Ca%3E%3C/a%3E&b=%5B%27abc%27%2C%201%2C%203%2C%205%5D&e=%7B%27c%27%3A%20%7B%27d%27%3A%20%27123%27%7D%2C%20%27cc%27%3A%20%5B1%2C%203%5D%7D&f=%26ff%3Df&g=999&h=%5B1%2C%202%2C%20%7B%27s%27%3A%20%5B4%2C%205%5D%2C%20%27b%27%3A%20333%7D%5D'
res = trans_json_get(res)
print('trans get to json:\n', res)
print()
res = json_to_xml(text)
print('json to xml:\n', res)
print()
res = xml_to_json(res)
print('xml to json:\n', res)
print()
res = json_format(res)
print('json format result:\n', res)
print()
res = json_to_xml(text)
print('json to xml:\n', res)
print()
'''
res = json_to_xml2(text)
print(res)
print()
res = xml_to_json2(res)
print(res)
print()
'''
版权声明:本文为weixin_44152831原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。