Python 3 | txt transfer to Excel | encode each data element with Pinyin + numbers | read multiple folder contents | import xlwt + xpinyin + os
I'm studying at zero school, and I'm bullied by my lovely elder martial sister. I do all kinds of chores to sort out forms, so I have this blog.
I copied the data in word and put it in txt in advance. There are multiple folders, with multiple TXT in each folder.
1. Because the doc file imported before is not available, it is used for txt; doc to docx is available in the next blog;
2. What should I do if there are many chores for postgraduates? Write code!
3. python reads the txt document
4. excel written by python
5. python reads multiple documents
My data format is as follows:
4.1 xxxxx key1: value key2: . . key3: . key4: . key5: . key6: . key7: . key8: . key9: . key10: . ----------------------------------------------------- key1: value key2: . . key3: . key4: . key5: . key6: . key7: . key8: . key9: . key10: . ----------------------------------------------------- ......
The python 3 code is as follows
#coding:utf-8 import os import xlwt import xlrd from xpinyin import Pinyin #Route original_data_path = 'xxxxxxxxx\\original\\' final_data_path = 'xxxxxxxxx\\final\\' # Read folder dirs = os.listdir(original_data_path) #Dictionary, used to count data elements in each txt myDict = dict() for file_dir in dirs: tmp_dir = os.path.join(original_data_path,file_dir) if os.path.isdir(tmp_dir): print(tmp_dir) txt_dirs = os.listdir(tmp_dir) result_dir = os.path.join(final_data_path,file_dir) print(result_dir) if not os.path.exists(result_dir): os.mkdir(result_dir) for txt_name in txt_dirs: tmp_txt_dir = os.path.join(tmp_dir,txt_name) print(tmp_txt_dir) excel_name = os.path.basename(tmp_txt_dir).split('.')[0] + '.xls' # print(excel_name) excel_dir = os.path.join(final_data_path,file_dir,excel_name) print(excel_dir) # Create a new excel file file = xlwt.Workbook(encoding='utf-8', style_compression=0) # Create a new sheet sheet = file.add_sheet('data') fopen = open(tmp_txt_dir, 'r') lines = fopen.readlines() count = 11 #My data element has 11 attributes p = Pinyin() #Table header properties sheet.write(0, 0, '1') sheet.write(0, 1, '2') sheet.write(0, 2, '3') sheet.write(0, 3, '4') sheet.write(0, 4, '5') sheet.write(0, 5, '6') sheet.write(0, 6, '7') sheet.write(0, 7, '8') sheet.write(0, 8, '9') sheet.write(0, 9, '10') sheet.write(0, 10, '11') #Read lines in txt for line in lines: # The Python strip() method is used to remove the characters (space or newline by default) or character sequences specified at the beginning and end of a string. line = line.strip('\n') line = line.strip() if len(line) == 0: continue line = line.split(': ', 1) # print(count, ' ', line[1:]) if len(line) == 1: continue # print(count,' ',line) if count % 11 == 2: count = count + 1 i = count % 11 j = count // 11 value = ''.join(line[1:]) sheet.write(j, i, value) if i == 0: if value in myDict: print('value in myDict, value is %s' % value) num = myDict[value] + 1 myDict[value] = num else: num = 1 myDict[value] = num tmp = p.get_initials(value, u'') tmp = tmp + str(num).zfill(2) # sheet.write(j, i + 1, tmp) # count = count + 1 sheet.write(j, i+2, tmp) count = count + 1 file.save(excel_dir)