Python 3 | txt transfer to Excel | encode each data element with Pinyin + numbers | read multiple folder contents | import xlwt + xpinyin + os

Keywords: Python Excel encoding

Python 3 | txt transfer to Excel | encode each data element with Pinyin + numbers | read multiple folder contents | import xlwt + xpinyin + os

I'm studying at zero school, and I'm bullied by my lovely elder martial sister. I do all kinds of chores to sort out forms, so I have this blog.
I copied the data in word and put it in txt in advance. There are multiple folders, with multiple TXT in each folder.
1. Because the doc file imported before is not available, it is used for txt; doc to docx is available in the next blog;
2. What should I do if there are many chores for postgraduates? Write code!
3. python reads the txt document
4. excel written by python
5. python reads multiple documents
My data format is as follows:

4.1 xxxxx
key1: value
key2: . . 
key3: . 
key4: . 
key5: . 
key6: . 
key7: . 
key8: . 
key9: . 
key10: . 
-----------------------------------------------------
key1: value
key2: . . 
key3: . 
key4: . 
key5: . 
key6: . 
key7: . 
key8: . 
key9: . 
key10: . 
-----------------------------------------------------
......

The python 3 code is as follows

#coding:utf-8
import os
import xlwt
import xlrd
from xpinyin import Pinyin
#Route
original_data_path = 'xxxxxxxxx\\original\\'
final_data_path = 'xxxxxxxxx\\final\\'
# Read folder
dirs = os.listdir(original_data_path)
#Dictionary, used to count data elements in each txt
myDict = dict()
for file_dir in dirs:
    tmp_dir = os.path.join(original_data_path,file_dir)
    if os.path.isdir(tmp_dir):
        print(tmp_dir)
        txt_dirs = os.listdir(tmp_dir)
        result_dir = os.path.join(final_data_path,file_dir)
        print(result_dir)
        if not os.path.exists(result_dir):
            os.mkdir(result_dir)
        for txt_name in txt_dirs:
            tmp_txt_dir = os.path.join(tmp_dir,txt_name)
            print(tmp_txt_dir)
            excel_name = os.path.basename(tmp_txt_dir).split('.')[0] + '.xls'
            # print(excel_name)
            excel_dir = os.path.join(final_data_path,file_dir,excel_name)
            print(excel_dir)

            # Create a new excel file
            file = xlwt.Workbook(encoding='utf-8', style_compression=0)
            # Create a new sheet
            sheet = file.add_sheet('data')
            fopen = open(tmp_txt_dir, 'r')
            lines = fopen.readlines()
            count = 11 #My data element has 11 attributes
            p = Pinyin()
			
			#Table header properties
            sheet.write(0, 0, '1')
            sheet.write(0, 1, '2')
            sheet.write(0, 2, '3')
            sheet.write(0, 3, '4')
            sheet.write(0, 4, '5')
            sheet.write(0, 5, '6')
            sheet.write(0, 6, '7')
            sheet.write(0, 7, '8')
            sheet.write(0, 8, '9')
            sheet.write(0, 9, '10')
            sheet.write(0, 10, '11')
			
			#Read lines in txt
            for line in lines:
                # The Python strip() method is used to remove the characters (space or newline by default) or character sequences specified at the beginning and end of a string.
                line = line.strip('\n')
                line = line.strip()
                if len(line) == 0:
                    continue
                line = line.split(': ', 1)
                # print(count, ' ', line[1:])
                if len(line) == 1:
                    continue
                # print(count,' ',line)
                if count % 11 == 2:
                    count = count + 1
                i = count % 11
                j = count // 11
                value = ''.join(line[1:])
                sheet.write(j, i, value)
                if i == 0:
                    if value in myDict:
                        print('value in myDict, value is %s' % value)
                        num = myDict[value] + 1
                        myDict[value] = num
                    else:
                        num = 1
                        myDict[value] = num
                    tmp = p.get_initials(value, u'')
                    tmp = tmp + str(num).zfill(2)
                    # sheet.write(j, i + 1, tmp)
                    # count = count + 1
                    sheet.write(j, i+2, tmp)
                count = count + 1

            file.save(excel_dir)

Posted by tisource on Tue, 22 Oct 2019 11:08:51 -0700