java Chinese Reading

Keywords: Programming encoding Java

java Chinese Reading

Hexadecimal Values of Chinese Characters in Different Encoding Ways

Different encoding methods, Chinese hexadecimal values are also different, such as the word "Zhong"

package JavaIOTest;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;

public class ShowChineseEncodeValue {
    public static void main(String[] args) {
        String chinese = "in";
        showCode(chinese);
    }
    private static void showCode(String str){
        String[] encodes = new String[]{"BIG5","GBK","GB2312","UTF-8","UTF-16","UTF-32"};
        for (String encode : encodes){
            showCode(str,encode);
        }
    }
    private static void showCode(String str,String encode){
        try{
            System.out.printf("Characters:\"%s\"In coding mode%s The lower hexadecimal value is\n",str,encode);
            //getBytes() gets the byte array of the system's default encoding
            //getBytes("GBK") Gets an array of specified coded characters
            byte[] bs = str.getBytes(encode);
//            System.out.println(Arrays.toString(bs));
            for (byte b : bs){
                int i = b&0xff;//
                System.out.print(Integer.toHexString(i)+"\t");
            }
            System.out.println();
        }catch (UnsupportedEncodingException e){
            System.out.printf("UnsupportedEncodingException: %s Characters cannot be parsed by encoding%s\n", encode, str);
        }
    }
}

Reading Chinese by Character Stream

1. First of all, we need to know which encoding method is used to save characters.
2. The correct character can be obtained by using the corresponding encoding method after reading the text with the character stream.
Prepare a textFile.txt file, save the encoding format as UTF-8, read using byte stream below

package JavaIOTest;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

public class ReadChineseByFileInputeStream {
    public static void main(String[] args) {
        File textFile = new File("d:/xyz/z/zzz/zzzz/textFile.txt");
        byte[] bytes = new byte[(int)textFile.length()];
        try(FileInputStream fileInputStream = new FileInputStream(textFile) ){
            fileInputStream.read(bytes);
            System.out.println(new String(bytes,"UTF-8"));
        }catch (IOException e){
            e.printStackTrace();
        }
    }
}

Read text using FileReader

FileReader gets characters, so bytes must have been recognized as characters by some kind of encoding.
The encoding method used by FileReader is the return value of Charset.defaultCharset(). FileReader can't set the encoding mode manually. In order to use other encoding methods, InputStreamReader can only be used instead. Like this:

new InputStreamReader(new FileInputStream(f),Charset.forName("UTF-8")); 
package JavaIOTest;

import java.io.*;
import java.nio.charset.Charset;

public class ReadChineseByFileReader {
    public static void main(String[] args) throws UnsupportedEncodingException, FileNotFoundException {
        File textFile = new File("d:/xyz/z/zzz/zzzz/textFile.txt");
        System.out.println("The default encoding is"+ Charset.defaultCharset());
        char[] chars = new char[(int)textFile.length()];
        try(FileReader fileReader = new FileReader(textFile)){
            fileReader.read(chars);
            System.out.printf("FileReader Use default encoding%s,The recognized characters are%n",Charset.defaultCharset());
            System.out.println(new String(chars));
        }catch (IOException e){
            e.printStackTrace();
        }
        try (InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(textFile),Charset.forName("utf-8"))){
            inputStreamReader.read(chars);
            System.out.printf("InputStreamReader Specified encoding method UTF-8 The recognized characters are\n");
            System.out.println(new String(chars));
        }catch (IOException e){
            e.printStackTrace();
        }
    }
}

Posted by timbo6585 on Tue, 30 Jul 2019 13:19:17 -0700