#头条创作挑战赛#
一、如何对一个大的文件进行一个切割
import lombok.extern.slf4j.Slf4j;import org.springframework.stereotype.Component;import org.springframework.util.StringUtils;import java.io.*;import java.util.ArrayList;import java.util.List;/** * 处理csv文件类,进行将大文件进行分割 */@Component@Slf4jpublic class SplitFileUtil { public final static int line_num_split_file = 20000; public static final String _encoding = "GB2312"; /** * @param sourceFileUrl 源路径 * @param targetPath 目标路径 * @return * @throws IOException */ public static List<String> parateFileByLine(String sourceFileUrl, String targetPath) throws IOException { List<String> filenames = new ArrayList<>(); int file_count; //获取文件总行数 long lineNum = getFileLineNum(sourceFileUrl); //不做分文件处理 Constants.line_num_per_file;自定义的常量 if (lineNum <= line_num_split_file) { filenames.add(sourceFileUrl); } el { //分割文件 if (lineNum % line_num_split_file == 0) { file_count = (int) (lineNum / line_num_split_file); } el { file_count = (int) (lineNum / line_num_split_file + 1); } Long[] countArray; if (file_count > Constants.max_file_count) { file_count = Constants.max_file_count; //将每个文件的起始索引记录下来 countArray = new Long[file_count]; long file_start_index; int line_num_split_file; if (lineNum % file_count == 0) { line_num_split_file = (int) (lineNum / file_count); } el { line_num_split_file = (int) (lineNum / file_count + 1); } for (int i = 0; i < file_count; i++) { file_start_index = (i + 1) * line_num_split_file; countArray[i] = file_start_index; } } el { //将每个文件的起始索引记录下来 countArray = new Long[file_count]; long file_start_index = 0; for (int i = 0; i < file_count; i++) { file_start_index = (i + 1) * line_num_split_file; countArray[i] = file_start_index; } } /*处理文件*/ InputStreamReader fr = null; BufferedReader br = null; try { fr = new InputStreamReader(new FileInputStream(sourceFileUrl), _encoding); br = new BufferedReader(fr); int count = 0; int _index = 0; String rec;// 一行 List<String> result = new ArrayList<>(); //读取掉第一行数据,第一行为标题 br.readLine(); // 读取一行 while ((rec = br.readLine()) != null) { count++; result.add(rec); if (count == countArray[_index]) { //一次性写入文件 String targetFile = targetPath + "data_part_" + _index + ".csv"; writeCsv(targetFile, result); result.clear(); _index++; filenames.add(targetFile); } } if (result.size() > 0) { String targetFile = targetPath + "data_part_" + (file_count - 1) + ".csv"; writeCsv(targetFile, result); result.clear(); filenames.add(targetFile); } } catch (Exception e) { e.printStackTrace(); } finally { if (!StringUtils.isEmpty(br)) { br.clo(); } if (!StringUtils.isEmpty(fr)) { fr.clo(); } } } return filenames; } /** * @param csvFile 输出的文件 * @param contentList 输出的内容 * @throws IOException */ public static void writeCsv(String csvFile, List<String> contentList) throws IOException { BufferedWriter bw = null; try { bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(csvFile), _encoding), 1024); for (int i = 0; i < contentList.size(); i++) { String str = contentList.get(i); bw.write(str); bw.newLine(); } bw.flush(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } finally { if (!StringUtils.isEmpty(bw)) { bw.clo(); } } } /** * @param filename 获取文件的行数 * @return * @throws IOException */ public static long getFileLineNum(String filename) throws IOException { long time1 = System.currentTimeMillis(); //取出文件总行数 long count = 0; InputStreamReader fr = null; BufferedReader br = null; try { fr = new InputStreamReader(new FileInputStream(filename), _encoding); br = new BufferedReader(fr); // 读取一行 while (br.readLine() != null) { count++; } } catch (Exception e) { e.printStackTrace(); } finally { if (!StringUtils.isEmpty(br)) { br.clo(); } if (!StringUtils.isEmpty(fr)) { fr.clo(); } } long time2 = System.currentTimeMillis(); log.info("getFileLineNum=={} time is:{}", count, (time2 - time1)); return count; }}
实现分割后写入文件中
实现读取文件的行数
本文发布于:2023-02-28 21:10:00,感谢您对本站的认可!
本文链接:https://www.wtabcd.cn/zhishi/a/1677731915101204.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文word下载地址:文件切割(PDF文件分割).doc
本文 PDF 下载地址:文件切割(PDF文件分割).pdf
留言与评论(共有 0 条评论) |