python day 9: xlm module, configparser module, subprocess module

Keywords: Python xml encoding Attribute

Catalog

python day 9

2019/10/12

Learning materials come from the education of older boys

1. xml module

xml module is a way to communicate, exchange and process data in xml format between different languages before json comes out.

1.1 First Understanding xml

import requests

from xml.etree import ElementTree as ET

# Send HTTP requests, or content in XML format, using third-party module requests
response = requests.get('http://www.webxml.com.cn//webservices/qqOnlineWebService.asmx/qqCheckOnline?qqCode=375132083')
result = response.text  # String type

# Parsing XML format content using ElementTree in XML packages
# XML receives a string as a parameter and formats it into a special object
node = ET.XML(result)

#Get content
if node.text =='Y':
    print('On-line')
import requests

response2 = requests.get('http://www.webxml.com.cn/WebServices/TrainTimeWebService.asmx/getDetailInfoByTrainCode?TrainCode=K234&UserID=starpinlan')
result2 = response2.text  #String str

from xml.etree import ElementTree as ET

root = ET.XML(result2)  #<class 'xml.etree.ElementTree.Element'>
# print(type(node2))

for node in root.iter('TrainDetailInfo'):  #The iter method is to find that the descendant tag name in the root object is all elements of TrainDetailInfo
    print(node.tag,node.attrib)  #Output its label name and its attributes
    print(node.find('TrainStation').text) #The find method is to find the first sublabel of the specified name

1.2 Traversing specified nodes of xml documents

  • Create an xml document and write it as follows:
s1 = '''
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year id="year">2024</year>
        <gdppc>141100</gdppc>
        <neighbor direction="E" name="Austria" />
        <neighbor direction="W" name="Switzerland" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year id="year">2027</year>
        <gdppc>59900</gdppc>
        <neighbor direction="N" name="Malaysia" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year id="year">2027</year>
        <gdppc>13600</gdppc>
        <neighbor direction="W" name="Costa Rica" />
        <neighbor direction="E" name="Colombia" />
    </country>
</data>
'''

with open('xmltest.xml','w+',encoding='utf-8') as f:
    f.write(s1)
    f.flush()
  • Read the xml document without modifying the original file
from xml.etree import ElementTree as ET
#Using XML to format strings
root = ET.XML(open('xmltest.xml',encoding='utf-8').read())  #This method can only read, not modify the file.
print(root.tag)
for node in root.iter('country'):
    print(node.tag,node.attrib,node.find('year').text)
    child_node = node.find('year')  #Find all nodes labeled year
    new_year = int(child_node.text)+1 #Add the value of the year tag to 1
    child_node.text = str(new_year)
    child_node.set('name','year')  #Set the attribute name with the value of year
    # del child_node.attrib['name']  #Delete name attribute key-value pairs

#To save a file, you need to create a new file
tree = ET.ElementTree(root)  #Create an ElementTree object with root content
tree.write('newxml.xml',encoding='utf-8')  #Be sure to write to the file through write.
  • Parse method is used to parse the file, and the original file can be modified.
#Parsing files using ElementTree's parse method

from xml.etree import ElementTree as ET

tree = ET.parse('xmltest.xml')  #The parse method opens the file and parses its contents
# print(tree,type(tree))  # <class 'xml.etree.ElementTree.ElementTree'>
root = tree.getroot()   # getroot method to get the top-level root node
# print(root.tag)  # data

for node in root.iter('year'):
    print(node.tag,node.text)
    new_year = int(node.text) + 1
    node.text = str(new_year)
    node.set('id','year')  # Set the id attribute with the value of year.
    print(node.attrib)    #  attrib is a dictionary form

tree.write('xmltest.xml')  #parse can modify the original file directly
  • How to view a node
from xml.etree  import ElementTree as ET

tree = ET.parse('xmltest.xml')
root = tree.getroot()
print(dir(root))  #How to view nodes

'''
['__class__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dir__', '__doc__', 
 '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', 
 '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__ne__', '__new__', 
 '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__setstate__', '__sizeof__', 
 '__str__', '__subclasshook__', 'append', 'attrib', 'clear', 'extend', 'find', 'findall', 'findtext', 
 'get', 'getchildren', 'getiterator', 'insert', 'items', 'iter', 'iterfind', 'itertext', 'keys', 'makeelement', 
 'remove', 'set', 'tag', 'tail', 'text']
 #tag,attrib,find,text,set,iter,get,append,clear,extend,findall,getchildren,insert,makeelement
 '''

1.3 Manual creation of xml documents through python

from xml.etree import ElementTree as ET

root = ET.Element('namelist')  #Create an xml element (object) called namelist through the Element class
print(root,type(root))  # <Element 'namelist' at 0x00000184AD6C21D8> <class 'xml.etree.ElementTree.Element'>
name1 = ET.SubElement(root,'name',attrib={'enrolled':'yes'})  # Create a child node for root. The label name of the child node is name, and the attribute is
name1.text = 'lanxing'  #Assignment of content
age1 = ET.SubElement(name1,'age',attrib={'checked':'no'})  # Create child nodes for name1
age1.text = '18'
sex1 = ET.SubElement(name1,'sex',attrib={'sex':'male'})
sex1.text = 'male'

name2 = ET.SubElement(root,'name',attrib={'enrolled':'yes'})  # Create a child node for root. The label name of the child node is name, and the attribute is
name2.text = 'LAN-STAR'  #Assignment of content
age2 = ET.SubElement(name2,'age',attrib={'checked':'no'})  # Create child nodes for name1
age2.text = '28'  # It must be a string to be serialized
sex2 = ET.SubElement(name2,'sex',attrib={'sex':'male'})
sex2.text = 'female'

tree = ET.ElementTree(root)  # Generating document objects through ElementTree classes
tree.write('xmltest2.xml',encoding='utf-8',xml_declaration=True)  #Write the document object to a file, and Declaration represents a declaration file, which is equivalent to a comment.

1.4 Two Ways to Create Nodes

from xml.etree import ElementTree as ET

tree = ET.parse('xmltest.xml')

root = tree.getroot()
# The first way to create a new node: make element
son = root.makeelement('son',{'sex':'male'})
print(son,type(son))  # <Element 'son' at 0x000002E1E1B10138> <class 'xml.etree.ElementTree.Element'>
# The second way to create a new node is to create it through the Element class, and the actual first way is to call the Element class.
son2 = ET.Element('son2',{'sex':'male'})
son2.text = 'male'
# Add a new node to root
root.append(son)
root.append(son2)

tree.write('xmltest.xml',encoding='utf-8',short_empty_elements=False) # The short parameter controls whether it can be abbreviated.

1.5 summary

XML:
    1,Analysis:
            str: ElementTree.XML(str)
            //File: ElementTree.parse(file)
    2,Element Object operations:
            tag,text,find,iter,get,set,findall,append,insert,remove. 
    3,Rewrite the file:
            ElementTree(Element(tag,attrib)).write(filepath,encoding=None)
            //ElementTree objects must be used to write methods.
    4,Establish XML: 
            root= Element(tag,attrib)
            ele = root.makeelement(tag,attrib)
            root.append(ele)
            ElementTree.SubElement(root,tag,attrib)
    5,indent
            from xml.dom import minidom

            def prettify(root):
                '''Convert nodes into strings and add indentation'''
                rough_string = ET.tostring(root,'utf-8')
                reparsed = minidom.parseString(rough_string)

                return reparsed.topprettyxml(indent='\t')
    6,Namespace
    7,very important
            //Everything is an object. type(obj) looks at the class of the object. dir(obj) looks at the methods an object has.

2. configparser module

The configparser module is used to process configuration files, which end with.ini and look like the following.

[AutoUpdate] #[xxx] is called a node
configfileurl = https://update.pan.baidu.com/autoupdate # The key-value pair on the left is called key and the value on the right is called value.
autoupdatecheckdelay = 30
configfilekey1 = EDAD921681272C3E37F34020450A6963
configfilekey2 = 132150629469920000
lstm_autoupdate = 1570589347
isautoupdate = 0

[PackageInstallInfo]
default = 2
c6aa1078e4d92ff0573452220ca2d8ae = 4

import configparser

con = configparser.ConfigParser()  # Create a ConfigParser object
con.read('config.ini',encoding='utf-8') # Open the file and read the contents of the file into memory

# 1. Get all nodes
# sections method of con object, searching for all [xxx] in memory, a [xxxx] is a node of configuration file
ret = con.sections() # Get all node names and return a list
print(ret)  # ['AutoUpdate', 'PackageInstallInfo']

# 2. Get all key-value pairs under the specified node, and return keys and values as tuples and tuples as elements of the list.
ret2 = con.items('AutoUpdate')
print(ret2)  # [('configfileurl', 'https://update.pan.baidu.com/autoupdate'), ('isautoupdate', '0'), ('autoupdatecheckdelay', '30'), ('configfilekey1', 'EDAD921681272C3E37F34020450A6963'), ('configfilekey2', '132150629469920000'), ('lstm_autoupdate', '1570589347')]

# 3. Get all keys of the specified node
ret3 = con.options('PackageInstallInfo')
print(ret3)  # ['default', 'c6aa1078e4d92ff0573452220ca2d8ae']

# 4. Get the value of the specified key under the specified node, get(sections,key)
value1 = con.get('PackageInstallInfo','default')  #Default return string
print(value1,type(value1))
# You can use the getint method to automatically convert strings to integer returns
value2 = con.getint('PackageInstallInfo','default')
print(value2,type(value2))
# You can use the getfloat method to automatically convert strings to floating-point returns
value3 = con.getfloat('PackageInstallInfo','default')
print(value3,type(value3))
# If the value is Boolean, you can use the getboolean method to automatically convert a string to a Boolean value and return it.
# value4 = con.getboolean('PackageInstallInfo','default')  # Errors are reported because the value is not a Boolean value

# 5. Check, add, delete nodes

has_sec = con.has_section('AutoUpdate')  #Determine whether there are nodes, and if there are returns to True
print(has_sec)

con.add_section('name')

con.remove_section('name')

# Add and delete operations must be rewritten to the file, otherwise they cannot be persisted.


# 6. Check, delete, and set the key-value pairs within the specified node

#inspect
has_opt =con.has_option('AutoUpdate','IsAutoUpdate')

#delete
con.remove_option('AutoUpdate','IsAutoUpdate')

#Set up
con.set('AutoUpdate','IsAutoUpdate','0')

con.write(open('config.ini','w'))  #write writes to receive a file object

4. shutil module

shutil module is a high-level file, folder, compression package processing module.

import shutil
# Copy the contents of a file to another file
# shutil.copyfileobj(fsrc,fdst[,length]) copies one file stream object to another, and the length is optional.
f1 =open('1.txt','r',encoding='utf-8')
f2 =open('2.txt','w',encoding='utf-8')
shutil.copyfileobj(f1,f2)
f1.close()
f2.close()

# shutil.copyfile(src,dst), copy file
shutil.copyfile('1.txt','3.txt')

# shutil.copymode(src,dst) only replicates the permissions of files, users, groups and content remain unchanged
shutil.copymode('1.txt','2.txt')  #dst needs to exist
# shutil.copystat(src,dst) only copies state information, including mode bits, atime, mtime, flags
shutil.copystat('1.txt', '2.txt')
# shutil.copy(src,dst) #Copy files and permissions
shutil.copy('1.txt','2.txt')
# shutil.copy2(src,dst) # Copy files and status information
shutil.copy2('1.txt','2.txt')
# shutil.ignore_patterns(*patterns) ignores certain formats of files
# shutil.copytree(src, dst, symlinks=False, ignore=None) recursively copies folders
shutil.copytree('.','../day10',symlinks=True,ignore=shutil.ignore_patterns('*.py'))

# shutil.rmtree(path[, ignore_errors[, onerror]])
shutil.rmtree('../day10')# Recursive Delete Files
# shutil.move(src, dst) recursively removes files, which are similar to mv commands, but are actually renamed.
shutil.make_archive(base_name, format[,root_dir[,owner[,group[,logger]]]])
# Create the compressed package and return the file path.
# base_name refers to the file package of the compressed package, or the path of the compressed package.
# format: zip,tar,bztar,gztar.
# root_dir: The folder path to be compressed (default current directory)
# owner: user, default current user
# Group: group, default current group
# logger: Used for logging, usually logging.Logger objects
shutil.make_archive('../test1','tar','.')

Shuil processes compressed packages by calling ZipFile and TarFile modules.

import zipfile

# compress
z = zipfile.ZipFile('laxi.zip', 'w') #Create a zip compressed package object
z.write('a.log') # Add files to the compressed package
z.write('data.data')
z.close()

# decompression
z = zipfile.ZipFile('laxi.zip', 'r')
z.extractall() # Unzip all files
print(z.namelist())  # Get all file names in the compressed package
z.extract('1.txt') # Unzip the specified file separately
z.close()


import tarfile

# compress
tar = tarfile.open('your.tar','w')  # Create compressed packages in tar format
tar.add('/Users/wupeiqi/PycharmProjects/bbs2.log', arcname='bbs2.log')  #Add a compressed file to it. You can also set the name to bbs2.log.
tar.add('/Users/wupeiqi/PycharmProjects/cmdb.log', arcname='cmdb.log')
tar.close()

# decompression
tar = tarfile.open('your.tar','r')
tar.extractall()  # Settable decompression address
print(tar.getmembers())  #Returns all file names in the compressed package
tar.extractfile('1.txt')  #Unzip the specified file separately
tar.close()

5. subprocess module

Designed for python to execute system commands

import subprocess

ret = subprocess.call('ipconfig')  # call executes system commands and returns status codes
print(ret)  # The successful return code is 0.

ret2 = subprocess.check_call('ipconfig')  # Execute commands and return status codes
print(ret2)
ret3 = subprocess.check_output('ipconfig')  # Execute the command and return the result of the command of byte type
print(ret3)

'''
Popen()Used to execute complex system commands

//Parameters:
    args: shell Command, which can be a string or a sequence type (e.g. list,Tuple)
    bufsize: Specify buffers. 0 no buffer,1 row buffering,Other buffer sizes,Negative System Buffer
    stdin, stdout, stderr: Represents the standard input, output, and error handles of the program, respectively
    preexec_fn: Only in Unix Platform valid for specifying an executable object( callable object),It will be called before the child process runs
    close_sfs: stay windows Under the platform, if close_fds Set to True,The newly created child process will not inherit the input, output, and error pipes of the parent process.
    //So close_fds cannot be set to True to redirect the standard input, output and error (stdin, stdout, stderr) of the subprocess at the same time.
    shell: Ditto
    cwd: Current directory for setting subprocesses
    env: The environment variable used to specify the child process. If env = None,The environment variables of the child process will be inherited from the parent process.
    universal_newlines: Different systems have different newline characters. True -> Agree to use \n
    startupinfo And createionflags Only in windows Effective under
    //The CreateProcess() function, which is passed to the underlying layer, is used to set some properties of the subprocess, such as the appearance of the main window, the priority of the process, and so on. 
'''

subprocess.Popen('mkdir test',shell=True,cwd='.')  # When the shell is True, the previous string is entered as a command
subprocess.Popen(['ipconfig','dir'])  # Execute multiple commands

obj = subprocess.Popen(["python"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
obj.stdin.write("print(1)\n")
obj.stdin.write("print(2)")
obj.stdin.close()

cmd_out = obj.stdout.read()
obj.stdout.close()
cmd_error = obj.stderr.read()
obj.stderr.close()

print(cmd_out)
print(cmd_error)

Posted by lakilevi on Sat, 12 Oct 2019 08:22:23 -0700