python day 8: re module supplement, import module, hashlib module, string formatting, module knowledge collection, initial knowledge of requests module

Keywords: Python encoding github pip

Catalog

python day 8

2019/10/11

Data from the education of older boys

1. re module supplement

import re

data = 'hello my name is lanxing and hello 30, i am very pleased to meet you guys.'
# Ab initio matching: match (pattern, string, flags = 0), flags: I (ignoring case), M (multiple lines), S(. can match all including newline characters), X (ignoring the annotation of pattern)
# Without grouping
ret = re.match('h\w+', data)  # If the match is successful, the matching object will be returned. If the match is not successful, the matching object will be returned. If the match is not successful, the matching object will be returned. If the match is not successful, the matching object will be returned. If the match is not successful, the group method will be returned to None.
print(ret.group())  # The group method returns all matched strings
print(ret.groups())  # groups method returns the grouping result in the matching result and presents the grouping result in tuple form.
print(ret.groupdict())  # The groupdict method returns all groups that execute key s in the grouping of matched results.
'''
hello
()
{}
'''
# In the case of grouping:
ret2 = re.match('(?P<n1>h)(?P<name2>\w+)', data)  # P < N1 > gives the group a name n1, and the group key is n1, which can be used to refer to the group result later.
print(ret2.group())  # The group method returns all matched strings and the result is a string.
print(ret2.groups())  # The group method returns the grouping result in the matching result, and each grouping result is returned as an element of the tuple.
print(ret2.groupdict())  # The groupdict method returns all groups that execute key s in the grouping of matched results. It's a dictionary.
'''
hello
('h', 'ello')
{'n1': 'h', 'name2': 'ello'}
'''
# search(pattern,str,flags=0), search the entire string, and match the first pattern that appears

ret3 = re.search('H(\w+).*(?P<num>\d)', data,flags=re.I)  # Without the native string flag r, you need to add a python escape\
print(ret3.group())  # The group method returns all matched strings and the result is a string.
print(ret3.groups())  # The group method returns the grouping result in the matching result, and each grouping result is returned as an element of the tuple.
print(ret3.groupdict())  # The groupdict method returns all groups that execute key s in the grouping of matched results. It's a dictionary.
'''
hello my name is lanxing and hello 30
('ello', '0')
{'num': '0'}
'''

#findall(pattern,str,flags=0), searches the entire string, matches all characters that match the pattern, and returns a list
#1. In the case of no grouping, all matching results that match the pattern are returned in the list, and the matching results cannot be overlapped.
ret4 = re.findall('\d+\w\d+','a2b3c4d5')  # Return a list of all non-overlapping matches in the string
print(ret4)
ret5 = re.findall('','a2b3c4d5')  # Matching results that are null will also be included in the list.
print(ret5)
#  ['', '', '', '', '', '', '', '', '']
# 2. In the case of only one grouping, the elements in the groups tuple are taken out separately and returned as elements of the list.
n = re.search('h(?P<name1>\w+).*\d','abhw23, hel54')
print(n.group())  # hw23, hel54
print(n.groups())  # ('w23',)
n1 = re.findall('h(?P<name1>\w+).*\d','abhw23, hel54')
print(n1)  # ['w23']


#3. In the case of more than one grouping, the matching result of the grouping is returned first, and the result of the grouping is put into the list for returning. It is useless to give the alias of the grouping in findall.
ret6 = re.findall('h(?P<name1>\w+).*(\d)','abhw23, hel54')
print(ret6)  #[('w23', '4')]
ret7 = re.findall('h(\w+(d)).*(\d)','abhwd23, hel54')  #The contents of each bracket are treated as elements of a tuple, which is then returned as elements of the list.
print(ret7)  #[('wd', 'd', '4')]
# #Search the entire string, match the pattern-compliant string, and return an iterator
ret8= re.finditer('h(\w+(?P<name1>d)).*(\d)','abhwd23, hel54')
print(ret8)
for i in ret8:
    print(i,i.group(),i.groups(),i.groupdict(),sep='\n')

# <callable_iterator object at 0x0000017BED3A8DA0>
# <re.Match object; span=(2, 14), match='hwd23, hel54'>
# hwd23, hel54
# ('wd', 'd', '4')
# {'name1': 'd'}
m1 = re.findall('(\w)(\w)(\w)(\w)','hello')  #How many elements are nested tuples in the list, depending on the number of actual groupings.
print(m1)  #[('h', 'e', 'l', 'l')]
m2 = re.findall('(\w){4}','hello')  #If only one grouping is actually displayed, the last required string is matched by default.
print(m2)  #['l']
m3 = re.findall('(\w)*','hello')  #Because greedy matching defaults to multiple matches, and * represents dispensability, when matched to o, * represents pattern as None, and matches again. Returns a null value.
print(m3)  # ['o', '']
m4 = re.findall('(\dasd)*','1asd2asdp3asd98132')  #When it's p's turn, * 0 appears, which means pattero is None. By 98132, the case of * zero also appeared, and all returned null values.
print(m4)
m5 = re.findall('(\dasd)+','1asd2asdp3asd98132')  #If it's + that's not going to happen. That is to say, if there are numeric representations *, +, {m,n}, by default matches the last pattern-compliant string.
print(m5)


#re.split(pattern, string, maxsplit=0, flags=0), first find all the matching characters, then divide them according to the matching characters and return the list. If there are grouping cases, the results of the grouping will also be in the list.
# 1. Without grouping:
data = 'hello my name is lanxing and hello 30, i am very pleased to meet you guys.'
print( re.findall('m\w',data) )  #  ['my', 'me', 'me']
li1 = re.split('m\w',data,maxsplit=2)  #You can limit how many elements in findall are used as splitting elements.
print(li1)  #  ['hello ', ' na', ' is lanxing and hello 30, i am very pleased to meet you guys.']

# 2. In the case of grouping:
print( re.findall('m(\w)',data) )
li2 = re.split('m(\w)',data,1)  # The grouping content is extracted and returned to the list as an element
print(li2)

#sub(pattern, repl, string, count=0, flags=0) replaces the matched content, and count if 0 represents replacing all matched content.
print(re.sub('\d','kk','1as23fe'))  #Returns the replaced string
print(re.subn('\d','kk','1as23fe'))  #Returns a tuple ('kkaskkkkfe', 3) and replaces it three times

# 1 - 2 *((60-30+(-40.0/5)*(9-2*5/3 +7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2))
# Firstly, match the innermost parentheses, calculate their values, and then match the outer parentheses.
num = '1 - 2 *((60-30+(-40.0/5)*(9-2*5/3 +7/3*99/4*2998+10*568/14))-(-4*3)/(16-3*2)) '
# match2 = re.findall(r'\([^()]+\)',num)
# print(eval(match2[0]))
# print(match2)
# match1 = re.split(r'\(([^()]+)\)',num,1)  #Matching in escape brackets, grouping brackets and grouping brackets will be retained in the split list
# print(match1)

while True:
    print(num)
    result = re.split(r'\(([^()]+)\)',num,maxsplit=1)  #Cut according to the first occurrence of the innermost parentheses. The contents of the innermost parentheses are kept in the list as grouping results.
    if len(result)==3:  #Equivalent to 3 represents parentheses
        before = result[0]   # For the first time, the string preceding the innermost bracket appears
        content = result[1]  #First occurrence of strings in innermost brackets
        after = result[2]    # For the first time, the string after the innermost bracket appears
        new_content = eval(content)
        print(new_content,type(new_content))
        num = ''.join([before,str(new_content),after])  #The results calculated by eval function are then spliced with the contents of the front and back parentheses and assigned to the variable num, thus num becomes a new string with a layer of parentheses missing.
    else:
        final = eval(num)
        print(final)
        break

2. import module import

Import is the keyword of the import module.
Modules are divided into three categories: built-in module, standard module and third-party module.
The import path of the module is the elements of sys.path. The python interpreter searches for the paths of sys.path. If there is this module, the import succeeds.

Import module name means importing a module.
From package import module ename as BBB means to import a module from a library and nickname the module bbb, then use BBB instead of modulename. Use from as much as possible to import modules.
from package import * means importing everything.

3. os module

sys module is about python interpreter module, while os module is about operating system function module.

import os
current_dir = os.getcwd()   #Gets the current working directory, which is the absolute directory path for the current python script to work on.
print(current_dir)
# os.chdir('..')  #Changing the current script working directory is equivalent to the cd under the shell, followed by a directory as a parameter, which can be either a relative path or an absolute path.
print(os.curdir)  # Returns the string name of the current directory: ('.')
print(os.pardir)  #Returns the parent directory string name of the current directory: ('.')
#os.makedirs(path,mode,exist_ok=False)
# os.makedirs('dir/dir2') #Multilayer recursive directories can be generated. If not absolute paths, directories are created under the current directory by default.
# os.removedirs('dir/dir2') #If the directory is empty, remove it and recurse to the upper directory. If it is empty, delete it, and so on.
# os.mkdir('dir')  #Create a single level directory. If it is not an absolute path, the directory will be created in the current directory by default.
# os.rmdir('dir')  #Delete single-level empty directory, if not empty directory can not be deleted, error reporting; equivalent to rmdir dirname in shell
print(os.listdir('dir'))  #Lists all files and directories in the specified directory, including hidden files, and returns them as a list
# os.remove('dir/New Text Document.txt')  #Delete the specified file
#os.rename('oldname','newname') renames files / directories
print(os.stat('dir'))  #Get file / directory information
print(os.sep)  #Get the operating system-specific path separator, window is\, linux is/
print(os.linesep) #The line terminator used by the current platform is'rn'under win and'n' under linux.
print(os.pathsep)  #A string used to split file paths, i.e.';'
print(os.name)  #Display the current platform, NT under win, posix under Linux
os.system('dir')  #Run the shell command to display it directly
# print(os.environ)  #Getting System Environment Variables
print(os.path.abspath('../..') ) #Returns the absolute path of the specified path, and the general parameters are relative paths such as'.','. '
path_list = os.path.split(r'F:\github\thestudyofpython\studyofpython\pycharm_lanxing\learn_python_from_oldboy\day8_os\osmodule.py')
print(path_list)  #Divide path into directory and file name binaries and return
abs_par_path=os.path.dirname(r'F:\github\thestudyofpython\studyofpython\pycharm_lanxing\learn_python_from_oldboy\day8_os\osmodule.py')
#Returns the absolute path value of the upper directory of the specified directory, which is actually the first element of split
os.path.basename(r'F:\github\thestudyofpython\studyofpython\pycharm_lanxing\learn_python_from_oldboy\day8_os\osmodule.py')
#Returns the file name of the given path, and if the path ends with/or\, returns a null value, which is the second element of split.
os.path.exists(r'F:\github\thestudyofpython\studyofpython\pycharm_lanxing\learn_python_from_oldboy\day8_os\osmodule.py')
#If a given path exists, return True or False
os.path.isabs(r'F:dsaf/adf')
#If the given path is an absolute path, return True.
os.path.isfile(r'f:\13.txt')  #If the path is an existing file, return True
os.path.isdir(r'f:/test1')  #If the path is an existing folder, return True
print(os.path.join('f:','test1','test2','123.txt'))  #Paths are spliced together to form a directory. F: test1 test2 123. TXT
time1 = os.path.getatime(r'F:\github\thestudyofpython\studyofpython\pycharm_lanxing\learn_python_from_oldboy\day3\int_str_set.py')
#Returns the last access timestamp of the file or directory pointed to by path
import time
print(time.ctime(time1))
time2= os.path.getmtime(r'F:\github\thestudyofpython\studyofpython\pycharm_lanxing\learn_python_from_oldboy\day3\int_str_set.py')
print(time2)
# Returns the last modification timestamp of the file or directory pointed to by path

4. hashlib module

Used for encryption related operations, instead of md5 module and Sha module, mainly provides SHA1,SHA224,SHA256,SHA384,SHA512,MD5 algorithm.
Now commonly used are MD5 and SHA256.

import hashlib


# md5 algorithm
'''
- update(data): Update the hash object with the bytes in data. Repeated calls
                 are equivalent to a single call with the concatenation of all
                 the arguments.
 - digest():     Return the digest of the bytes passed to the update() method
                 so far as a bytes object.
 - hexdigest():  Like digest() except the digest is returned as a string
                 of double length, containing only hexadecimal digits.
 - copy():       Return a copy (clone) of the hash object. This can be used to
                 efficiently compute the digests of datas that share a common
                 initial substring.
'''
hash1 = hashlib.md5()  #Create a hash object for md5
hash1.update(bytes('lanxing',encoding='utf-8'))  #The update method uses byte-type data to update hash objects
print(hash1,type(hash1))
print(hash1.digest(),type(hash1.digest()))  #Returns a byte type summary of the hash object
print(hash1.hexdigest()) #Returns the hexadecimal hash value


hash2 = hashlib.sha256()
hash2.update(bytes('lanxing',encoding='utf-8'))
print(hash2.hexdigest())
# 59b304df8872f6f0443d389574796e65160c8a8b4df6073e20168ae82a73187a
hash3 = hashlib.sha256(bytes('lanxing',encoding='utf-8'))  #Create a hash object for sha256, whose salt is lanxing
hash3.update(bytes('lanxing',encoding='utf-8'))  #After adding salt, the original hash value is different. It can prevent others from colliding with the bank.
print(hash3.hexdigest())
# 590ec4accc861c0feb928f62c221f5b943e17aeda50e43d6d894050cf42de48e
import hashlib

def register(username,passwd):
    '''
    register a new account to the database
    :param username: username , a str
    :param passwd:  password, a str
    :return: True
    '''
    with open('accounts.db','a+',encoding='utf-8') as f:
        ret = hash_passwd(passwd)
        account = '\n'+username+'$'+ret
        f.write(account)
        f.flush()

    return True

def login(username,passwd):
    '''
    check the account to see if they match the account in the database
    :param username: username,a str
    :param passwd: password,a str
    :return: True if matched,False if not.
    '''
    with open('accounts.db','r',encoding='utf-8') as f:
        for line in f:
            data = line.split('$')
            ret = hash_passwd(passwd)
            if data[0]==username and ret == data[1]:
                return True



def hash_passwd(passwd):
    '''
    change the passwd to hexdigest of hash object
    :param passwd: a str
    :return: hexdigest
    '''
    hash1 = hashlib.sha256(bytes('lanxing', encoding='utf-8'))
    hash1.update(bytes(passwd, encoding='utf-8'))
    return hash1.hexdigest()


if __name__=='__main__':
    username = input('enter your name:>>>').strip()
    passwd = input('enter your passwd').strip()
    choice = input('1 to register or 2 to log in:>>>').strip()
    if choice == '1':
        register(username,passwd)
        print('login was successful')
    elif choice=='2':
        if login(username,passwd):
            print('Login successfully')
        else:
            print('Login failed')

5. String format: percentile and format methods

The percentile method is the older one, while format is the newer one, and now both coexist in Python 3.

  • Percentage method

% [name] [flags] [width]. [precision] typecode, where name,flags,width,precision are optional.
name represents key, which can be referenced later in the form of key:value.
flags and width are often used together:
flags means left-right alignment, values with + means right alignment, whitespace means right alignment, - means left alignment, 0 means right alignment, and 0 means insufficient width.
Width expresses width, which is generally unnecessary. The value is a number.
precision denotes accuracy, and the value is a number.
typecode is commonly used for s (for strings), D (for integers), and f (for floating-point numbers). Required parameter.
If there are placeholders in the string, two must be used to represent native. '%%%. 2f'%. 3.689

s='i am %(name)s, and my age is %(age)d'%{'name':'lanxing','age':19}
print(s)
# i am lanxing, and my age is 19
s1 = 'i am %(name)+10s, and my age is %(age).2f'%{'name':'lanxing','age':19.6789}
print(s1)
# i am    lanxing, and my age is 19.68
  • format method

[[fill]align][sign][#][0][width][,][.precision][type]
Introduction of parameters:

s1 = ' i am {},age {},sex {}'.format('lanxing',18,'male')
s2 = 'i am {},age {},sex {}'.format(*['lanxing',18,'male'])
s3 = 'i am {0},age{1},really {0}'.format('rich',19)  #Index value of format parameter, starting from 0
s4 = 'i am {0},age{1},really {0}'.format(*('rich',18))
s5 = 'i am {name},age{age},reall {name}'.format(name='rich',age=18)
s6 = 'i am {name},age{age},reall {name}'.format(**{'name':'rich','age':18})
s7 = 'i am {0[0]},age {0[1]},really{0[2]}'.format([11,22,33],[44,55,66])
s8 = 'i am {:s},age {:d},money {:.2f}'.format('lanxing',18,2689.222)
s9 = "i am {name:s}, age {age:d}".format(**{"name": "seven", "age": 18})
s10 =  "numbers: {:b},{:o},{:d},{:x},{:X}, {:%}".format(15, 15, 15, 15, 15, 15.87623, 2)
s11  = "numbers: {0:b},{0:o},{0:d},{0:x},{0:X}, {0:%}".format(15)
s12  = "numbers: {num:b},{num:o},{num:d},{num:x},{num:X}, {num:%}".format(num=15)

6. Modular Knowledge Collection

# print(vars())

'''
__name__
__loader__
__doc__
__package__
__spec__
__file__
__builtins__
__cached__
'''
print(__name__) #If it's the main file, _name_=='_main_', and if it's another file, it's the module name.
import str_format as sf
print(sf.__name__)

print(__doc__)  #Multi-line comments for your py file
print(sf.__package__ ) #Import the folder where the py file is located and use. to divide the folder. The current file is None.
print(__file__)  #The absolute path to your own files

print(sf.__cached__)  #Cache: Cache, the current file does not exist, only import other files.

# Only when it executes itself, _name_=='_main_', otherwise it is equal to its module name. In the future, the main program must first add a condition, so that when someone imports it, it will not execute automatically.
if __name__=='__main__':
    a = 1
#Place a folder path in sys.path without fear of parent folder renaming
import os,sys

if __name__=='__main__':  #Be sure to write this when the main file is executed
    dir1=os.path.dirname(__file__)  #_ File_ Be sure to remember that it's the absolute path of the file itself
    dir2 = os.path.dirname(dir1)  #The upper directory of a path
    s1 = 'day7_time_sys'
    dir3 =os.path.join(dir2,s1)  #Stitching Paths Together
    print(dir3)
    sys.path.append(dir3)  #Append a path to the module search path list of the python interpreter

import jsonmodule
  • Installation of third-party modules

Install using pip:
Step 1: Download pip first.
Step 2: Add the bin directory of pip to the environment variable.
Step 3: Run pip install modulename in cmd.

Source installation:
Step 1: Download the source code first.
Step 2: Decompress the source code.
Step 3: Enter the source directory in cmd.
Step 4: Execute python setup.py install in cmd.

7. Initial knowledge of requests module

import requests,json

# Requests module, sending http requests (using python to simulate browser requests)

response = requests.get('https://www.cnblogs.com/wupeiqi/articles/5484747.html')
# Send a request to the specified address and return a requests.models.Response object
response.encoding = 'utf-8'  #Set the encoding of the request object to utf8
result =response.text  #text is the content of the request object and is a str type
# result = json.loads(result)  #Report wrong
print(type(response),result,type(result),sep='\n')

Posted by Kerry Kobashi on Fri, 11 Oct 2019 12:24:23 -0700