Catalog
Basic operation
from docx import Document from docx.shared import Inches # Create an empty document document = Document() # Add Title, set level level, 0 as Title, 1 or omit as Heading 1, 0 < = level < = 9 document.add_heading('Document Title', 0) # Add paragraph with text = '' and style=None p = document.add_paragraph('A plain paragraph having some ') # Add a run object with parameters text=None and style=None, # The run object has bold (BOLD) and italic (italics) attributes p.add_run('bold').bold = True p.add_run(' and some ') p.add_run('italic.').italic = True document.add_heading('Heading, level 1', level=1) document.add_paragraph('Intense quote', style='Intense Quote') document.add_paragraph( 'first item in unordered list', style='List Bullet' ) document.add_paragraph( 'first item in ordered list', style='List Number' ) # Add pictures document.add_picture('monty-truth.png', width=Inches(1.25)) # Add form records = ( (3, '101', 'Spam'), (7, '422', 'Eggs'), (4, '631', 'Spam, spam, eggs, and spam') ) table = document.add_table(rows=1, cols=3) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'Qty' hdr_cells[1].text = 'Id' hdr_cells[2].text = 'Desc' for qty, id, desc in records: row_cells = table.add_row().cells row_cells[0].text = str(qty) row_cells[1].text = id row_cells[2].text = desc document.add_page_break()
Object relationship
After document. Add? Paragraph(), the content of the default paragraph is in the first run.
Add styles
Chinese font Microsoft YaHei, Western Font Times New Roman
import docx from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.shared import Cm, Pt document = Document() # Set a blank style style = document.styles['Normal'] # Set Western Font style.font.name = 'Times New Roman' # Set Chinese font style.element.rPr.rFonts.set(qn('w:eastAsia'), 'Microsoft YaHei')
text-indent
# Get paragraph style paragraph_format = style.paragraph_format # Indent the first line by 0.74 cm, i.e. 2 characters paragraph_format.first_line_indent = Cm(0.74)
Style title separately
# Set title title_ = document.add_heading(level=0) # Title centered title_.alignment = WD_ALIGN_PARAGRAPH.CENTER # Add title content title_run = title_.add_run(title) # Set title font size title_run.font.size = Pt(14) # Set Title Western Font title_run.font.name = 'Times New Roman' # Set Title Chinese font title_run.element.rPr.rFonts.set(qn('w:eastAsia'), 'Microsoft YaHei')
Set hyperlink
def add_hyperlink(paragraph, url, text, color, underline): """ A function that places a hyperlink within a paragraph object. :param paragraph: The paragraph we are adding the hyperlink to. :param url: A string containing the required url :param text: The text displayed for the url :return: The hyperlink object """ # This gets access to the document.xml.rels file and gets a new relation id value part = paragraph.part r_id = part.relate_to(url, docx.opc.constants.RELATIONSHIP_TYPE.HYPERLINK, is_external=True) # Create the w:hyperlink tag and add needed values hyperlink = docx.oxml.shared.OxmlElement('w:hyperlink') hyperlink.set(docx.oxml.shared.qn('r:id'), r_id, ) # Create a w:r element new_run = docx.oxml.shared.OxmlElement('w:r') # Create a new w:rPr element rPr = docx.oxml.shared.OxmlElement('w:rPr') # Add color if it is given if not color is None: c = docx.oxml.shared.OxmlElement('w:color') c.set(docx.oxml.shared.qn('w:val'), color) rPr.append(c) # Remove underlining if it is requested if not underline: u = docx.oxml.shared.OxmlElement('w:u') u.set(docx.oxml.shared.qn('w:val'), 'none') rPr.append(u) # Join all the xml elements together add add the required text to the w:r element new_run.append(rPr) new_run.text = text hyperlink.append(new_run) paragraph._p.append(hyperlink) return hyperlink document = docx.Document() p = document.add_paragraph() #add a hyperlink with the normal formatting (blue underline) hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', None, True) #add a hyperlink with a custom color and no underline hyperlink = add_hyperlink(p, 'http://www.google.com', 'Google', 'FF8822', False) document.save('demo.docx')
The above function is to directly add links to the whole content. In daily use, hyperlinks are mostly keywords or the format of < a > tags. The relationship between the two objects, paragraph and run, is used to solve this problem.
For example, if the text content is as follows, replace the < a > tag with a hyperlink:
"""I am trying to add an hyperlink in a MS Word document using docx module for <a href="python.org">Python</a>. Just do it."""
# Determine whether the field is a link def is_text_link(text): for i in ['http', '://', 'www.', '.com', '.org', '.cn', '.xyz', '.htm']: if i in text: return True else: return False # Hyperlink links in paragraphs def add_text_link(document, text): paragraph = document.add_paragraph() # Split text content according to < a > tags text = re.split(r'<a href="|">|</a>',text) keyword = None for i in range(len(text)): # For non link and non keyword content, run directly into the paragraph if not is_text_link(text[i]): if text[i] != keyword: paragraph.add_run(text[i]) # For links and keywords, use add? Hyperlink to insert hyperlinks elif i + 1<len(text): url=text[i] keyword=text[i + 1] add_hyperlink(paragraph, url, keyword, None, True)