1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
| public static Map<String, Long> statisticsParquet(String tablePath) throws IOException { Map<String, Long> map = new HashMap<>(3); Long rows = 0L; Long columns = 0L; Long fileSize = 0L;
FileSystem fs = null; ParquetFileReader parquetFileReader = null; try { fs = FileSystem.get(HdfsUtils.getConfiguration()); Path parquetFile; boolean isFirst = true;
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path(tablePath), true); while (listFiles.hasNext()) { LocatedFileStatus fileStatus = listFiles.next(); if (fileStatus.isFile() && fileStatus.getPath().toString().toLowerCase().endsWith(".parquet")) {
parquetFile = fileStatus.getPath(); fileSize += fileStatus.getLen();
parquetFileReader = new ParquetFileReader(HdfsUtils.getConfiguration(), parquetFile, ParquetMetadataConverter.NO_FILTER); rows += parquetFileReader.getRecordCount();
if (isFirst) { columns = (long) parquetFileReader.getFileMetaData().getSchema().getFieldCount(); isFirst = false; }
parquetFileReader.close(); } }
} finally { if (fs != null) { try { fs.close(); } catch (IOException e) { logger.error("FileSystem close ", e); } } if (parquetFileReader != null) { try { parquetFileReader.close(); } catch (IOException e) { logger.error(e.getLocalizedMessage(), e); } } }
map.put("rows", rows); map.put("columns", columns); map.put("fileSize", fileSize); return map; }
|