Using python to automatically generate software copyright source code

Keywords: Java Python encoding less

A small demand:

When applying for software copyright, you need to submit a page of 50 lines, a total of 60 pages of source code. However, the designed project is saved in a multi-level directory and does not want to be copied one by one, so we get all the files in the directory through python and os modules, re regularize and filter the invalid source code, and then write them into word based on docx module. The modules involved are os, docx, re

There are two major steps:

1. First, save the. java file path of all folders under a folder to a list
2. Read the path of the list in turn, and save the contents of the. java file to word

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
__title__ = ''
__author__ = 'mike_jun'
__mtime__ = '2019-7-1'
#Objective: 1. First, the .java File path saved to a list
        2. Read the path of the list in turn, and .java File content saved to word in
"""
import os
from docx import Document
from docx.oxml.ns import qn
from docx.shared import Pt
import re
from docx.shared import Length


fileList = [] # Save file path using global list
def getAllFile(path, fileList):  # Using recursive methods
    dirList = []  # Save folder
    files = os.listdir(path) # Returns a list of files and folders
    for f in files:
        if (os.path.isdir(path + '/' + f)):
            dirList.append(path + '/' + f)    #  Save folder name

        if (os.path.isfile(path + '/' + f)):
            fileList.append(path + '/' + f) # Save file name

    for dir in dirList: #Recursion exits automatically if the folder is empty
        getAllFile(dir, fileList) # Recursively save to save. java file to fileList

getAllFile( r'E:\src\main\java\com\gdut', fileList)
print('The number of documents is: ',len(fileList))

def getJavaFile(fileList):
    for file in fileList:
        if not file.endswith('.java'): # Delete is not a. java file format
            fileList.remove(file)
    print('The number of documents is: ',len(fileList))
getJavaFile(fileList)
print(os.path.isfile(fileList[0])) # Determine whether the first value is a file

def saveDocFile():
    # SINGLE         =>  Single spacing (default)
    # ONE_POINT_FIVE =>  1.5 Multiple row spacing
    # DOUBLE2        =>  Multiple row spacing
    # AT_LEAST       =>  minimum value
    # EXACTLY        =>  Fixed value
    # MULTIPLE       =>  Multiple line spacing
    doc = Document()
    from docx.enum.text import WD_LINE_SPACING
    p = doc.add_paragraph('') #Add one page
    doc.styles['Normal'].font.name = 'Times New Roman'  # The body is normal, set the font format of the body
    doc.styles['Normal'].font.size = Pt(8) #  Set font size to 5 Font number
    p.line_spacing_rule = WD_LINE_SPACING.EXACTLY  # Fixed value
    paragraph_format = doc.styles['Normal'].paragraph_format
    paragraph_format.line_spacing = Pt(12.9)  # Fixed value 12,9 lbs, 50 lines of code per page
    save_file = r'E:\text.doc'
    codeNum = 0
    for i, f in enumerate(fileList):
        print('starting deal %d'%i)
        with open(f, encoding='UTF-8') as file:  # Transform the code to output Chinese format correctly
            for line in file.readlines():

                if line == '\n':  # Delete blank lines
                    continue
                if re.match(r'^\s+$', line):  # Use regular expressions to delete blank lines with spaces
                    continue
                if line.__contains__(r'/*') or \
                    line.__contains__(r' *'):   # Delete Note
                    continue
                if line.__contains__(r'//': ා delete include // Notes, Regular expressions should be used strictly for deletion
                    continue
                p.add_run(line)
                codeNum += 1 # Records are written data
                if codeNum == 3050:  # Ensure that the printout does not exceed 60 page
                    doc.save(save_file)
                    return
    doc.save(save_file)  # Less than 60 pages to save
    print('all done')

saveDocFile()
print('all done')

 

There are also disadvantages: header, page number, title page are not done, manual work can also be done, ha ha.

 

Posted by Angry Coder on Sun, 19 Apr 2020 08:13:25 -0700