parquet,text转jsonl的cpp方法(遍历文件目录,并生成对应的jsonl) ,根目录创建input文件夹放要转的数据,转成的jsonl会生成在output文件夹
git clone --recurse-submodules https://github.com/kkghrsbsb/cpp_tojsonl.git
cd cpp_tojsonl
cd thirdparty/vcpkg
.\bootstrap-vcpkg.bat
cd ../..
mkdir build
cd build
cmake .. -DCMAKE_TOOLCHAIN_FILE=../thirdparty/vcpkg/scripts/buildsystems/vcpkg.cmake -DCMAKE_BUILD_TYPE=Release
cmake --build . --config Release