Python: Converting txt files to csv

Keywords: Python encoding Excel

Recently, a lot of txt files need to be converted into csv file format in the project. In the past, excel files were opened manually and then imported into txt to produce csv files. As this has become a weekly need, we decided to use python automation script to achieve this idea:

  1. Read all txt files in the folder and save them in the list
  2. For each txt file, automatically produce csv files with the same file name
  3. For each txt file, it is saved as a csv file according to the delimiter, and the delimiter is a semicolon ";". Before conversion, the file code should be unified into'utf-8', because in the process of implementation, encoding error always appears.
  4. Create a new TXT folder to store all txt files

The complete code is as follows:

import csv
import os
import shutil
from chardet.universaldetector import UniversalDetector

def get_encode_info(file):
    with open(file, 'rb') as f:
        detector = UniversalDetector()
        for line in f.readlines():
            detector.feed(line)
            if detector.done:
                break
        detector.close()
        return detector.result['encoding']

def read_file(file):
    with open(file, 'rb') as f:
        return f.read()

def write_file(content, file):
    with open(file, 'wb') as f:
        f.write(content)

def convert_encode2utf8(file, original_encode, des_encode):
    file_content = read_file(file)
    file_decode = file_content.decode(original_encode,'ignore')
    file_encode = file_decode.encode(des_encode)
    write_file(file_encode, file)

## Move *.txt to a folder
def move2txtfolder(path, txt_file_list):
    txt_folder_path = path + '\\txt'
    if not os.path.exists(txt_folder_path):
        os.makedirs(txt_folder_path)

    for file in txt_file_list:
        des_path = os.path.join(txt_folder_path, os.path.basename(file))
        shutil.move(file, des_path)

##Find all *. txt files in the path
def findtxt(path, txt_file_list):
    file_name_list = os.listdir(path)
    for filename in file_name_list:
        de_path = os.path.join(path, filename)
        if os.path.isfile(de_path):
            if de_path.endswith(".txt"):  # Specify to find the txt file.
                txt_file_list.append(de_path)
        else:
            findtxt(de_path, txt_file_list)

def txt2csv(txt_file):
    ##First convert encoding of all files to utf-8
    encode_info = get_encode_info(txt_file)
    if encode_info != 'utf-8':
        convert_encode2utf8(txt_file, encode_info, 'utf-8')

    csv_file = os.path.splitext(txt_file)[0] + '.csv'
    with open(csv_file, 'w+', newline='', encoding='utf-8') as csvfile:
        writer = csv.writer(csvfile, dialect='excel')

        with open(txt_file, 'r', encoding='utf-8') as txtfile:
            for line in txtfile.readlines():
                line_list = line.strip('\n').split(';')
                writer.writerow(line_list)

if __name__ == '__main__':
    folder_path = r'C:\Details'
    # ##If there are subfolders in the folder, use the findtxt function
    # txt_file_list = []
    # findtxt(folder_path, txt_file_list)

    ##If there are no subfolders in the folder, use the derivation directly to produce the list of txt files
    txt_file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if os.path.join(folder_path, file).endswith('.txt')]

    for txt_file in txt_file_list:
        txt2csv(txt_file)
    
    move2txtfolder(folder_path, txt_file_list)

 

Posted by gmann001 on Sun, 13 Oct 2019 12:40:55 -0700