A small demand:
When applying for software copyright, you need to submit a page of 50 lines, a total of 60 pages of source code. However, the designed project is saved in a multi-level directory and does not want to be copied one by one, so we get all the files in the directory through python and os modules, re regularize and filter the invalid source code, and then write them into word based on docx module. The modules involved are os, docx, re
There are two major steps:
1. First, save the. java file path of all folders under a folder to a list
2. Read the path of the list in turn, and save the contents of the. java file to word
#!/usr/bin/env python # -*- coding: utf-8 -*- """ __title__ = '' __author__ = 'mike_jun' __mtime__ = '2019-7-1' #Objective: 1. First, the .java File path saved to a list 2. Read the path of the list in turn, and .java File content saved to word in """ import os from docx import Document from docx.oxml.ns import qn from docx.shared import Pt import re from docx.shared import Length fileList = [] # Save file path using global list def getAllFile(path, fileList): # Using recursive methods dirList = [] # Save folder files = os.listdir(path) # Returns a list of files and folders for f in files: if (os.path.isdir(path + '/' + f)): dirList.append(path + '/' + f) # Save folder name if (os.path.isfile(path + '/' + f)): fileList.append(path + '/' + f) # Save file name for dir in dirList: #Recursion exits automatically if the folder is empty getAllFile(dir, fileList) # Recursively save to save. java file to fileList getAllFile( r'E:\src\main\java\com\gdut', fileList) print('The number of documents is: ',len(fileList)) def getJavaFile(fileList): for file in fileList: if not file.endswith('.java'): # Delete is not a. java file format fileList.remove(file) print('The number of documents is: ',len(fileList)) getJavaFile(fileList) print(os.path.isfile(fileList[0])) # Determine whether the first value is a file def saveDocFile(): # SINGLE => Single spacing (default) # ONE_POINT_FIVE => 1.5 Multiple row spacing # DOUBLE2 => Multiple row spacing # AT_LEAST => minimum value # EXACTLY => Fixed value # MULTIPLE => Multiple line spacing doc = Document() from docx.enum.text import WD_LINE_SPACING p = doc.add_paragraph('') #Add one page doc.styles['Normal'].font.name = 'Times New Roman' # The body is normal, set the font format of the body doc.styles['Normal'].font.size = Pt(8) # Set font size to 5 Font number p.line_spacing_rule = WD_LINE_SPACING.EXACTLY # Fixed value paragraph_format = doc.styles['Normal'].paragraph_format paragraph_format.line_spacing = Pt(12.9) # Fixed value 12,9 lbs, 50 lines of code per page save_file = r'E:\text.doc' codeNum = 0 for i, f in enumerate(fileList): print('starting deal %d'%i) with open(f, encoding='UTF-8') as file: # Transform the code to output Chinese format correctly for line in file.readlines(): if line == '\n': # Delete blank lines continue if re.match(r'^\s+$', line): # Use regular expressions to delete blank lines with spaces continue if line.__contains__(r'/*') or \ line.__contains__(r' *'): # Delete Note continue if line.__contains__(r'//': ා delete include // Notes, Regular expressions should be used strictly for deletion continue p.add_run(line) codeNum += 1 # Records are written data if codeNum == 3050: # Ensure that the printout does not exceed 60 page doc.save(save_file) return doc.save(save_file) # Less than 60 pages to save print('all done') saveDocFile() print('all done')
There are also disadvantages: header, page number, title page are not done, manual work can also be done, ha ha.