From a3e2615638b2a1cd7498edc844fc0ef7f1bb5eaf Mon Sep 17 00:00:00 2001 From: Lala5th Date: Sat, 1 Aug 2020 17:32:51 +0200 Subject: [PATCH 01/10] Updated cmake. Added cnpy-config.cmake.in so it would be easier to add using find_package to other cmake projects. --- CMakeLists.txt | 4 ++++ cmake/cnpy-config.cmake.in | 2 ++ 2 files changed, 6 insertions(+) create mode 100644 cmake/cnpy-config.cmake.in diff --git a/CMakeLists.txt b/CMakeLists.txt index 9eb550f..becf6bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,6 +23,10 @@ if(ENABLE_STATIC) install(TARGETS "cnpy-static" ARCHIVE DESTINATION lib) endif(ENABLE_STATIC) +configure_file("${PROJECT_SOURCE_DIR}/cmake/cnpy-config.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cnpy-config.cmake" @ONLY IMMEDIATE) +install(FILES "${PROJECT_BINARY_DIR}/cnpy-config.cmake" DESTINATION "lib/cmake/cnpy-${PROJECT_VERSION}") + + install(FILES "cnpy.h" DESTINATION include) install(FILES "mat2npz" "npy2mat" "npz2mat" DESTINATION bin PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) diff --git a/cmake/cnpy-config.cmake.in b/cmake/cnpy-config.cmake.in new file mode 100644 index 0000000..95f508e --- /dev/null +++ b/cmake/cnpy-config.cmake.in @@ -0,0 +1,2 @@ +set(CNPY_INCLUDE_DIRS @CMAKE_INSTALL_PREFIX@/include) +set(CNPY_LIBRARIES -L@CMAKE_INSTALL_PREFIX@/lib cnpy) From 428f9176fee99c89cbdb3c2496be1b966ca6d8f5 Mon Sep 17 00:00:00 2001 From: Lala5th Date: Wed, 5 Aug 2020 10:29:33 +0200 Subject: [PATCH 02/10] Added way to save tuples[Only for npz_save currently] --- .gitignore | 1 + CMakeLists.txt | 2 +- cnpy.cpp | 21 +++--- cnpy.h | 188 +++++++++++++++++++++++++++++++++++++++++++++++-- example1.cpp | 24 ++++--- out.npy | Bin 0 -> 144 bytes 6 files changed, 210 insertions(+), 26 deletions(-) create mode 100644 .gitignore create mode 100644 out.npy diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a173085 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.ccls-cache diff --git a/CMakeLists.txt b/CMakeLists.txt index becf6bf..c6a2b74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ endif(COMMAND cmake_policy) project(CNPY) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17") option(ENABLE_STATIC "Build static (.a) library" ON) diff --git a/cnpy.cpp b/cnpy.cpp index 2d28578..b785ce3 100644 --- a/cnpy.cpp +++ b/cnpy.cpp @@ -87,7 +87,7 @@ void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector } //endian, word size, data type - //byte order code | stands for not applicable. + //byte order code | stands for not applicable. //not sure when this applies except for byte array loc1 = header.find("descr")+9; bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); @@ -101,9 +101,9 @@ void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector word_size = atoi(str_ws.substr(0,loc2).c_str()); } -void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector& shape, bool& fortran_order) { +void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector& shape, bool& fortran_order) { char buffer[256]; - size_t res = fread(buffer,sizeof(char),11,fp); + size_t res = fread(buffer,sizeof(char),11,fp); if(res != 11) throw std::runtime_error("parse_npy_header: failed fread"); std::string header = fgets(buffer,256,fp); @@ -135,7 +135,7 @@ void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector& sh } //endian, word size, data type - //byte order code | stands for not applicable. + //byte order code | stands for not applicable. //not sure when this applies except for byte array loc1 = header.find("descr"); if (loc1 == std::string::npos) @@ -234,7 +234,7 @@ cnpy::npz_t cnpy::npz_load(std::string fname) { throw std::runtime_error("npz_load: Error! Unable to open file "+fname+"!"); } - cnpy::npz_t arrays; + cnpy::npz_t arrays; while(1) { std::vector local_header(30); @@ -252,7 +252,7 @@ cnpy::npz_t cnpy::npz_load(std::string fname) { if(vname_res != name_len) throw std::runtime_error("npz_load: failed fread"); - //erase the lagging .npy + //erase the lagging .npy varname.erase(varname.end()-4,varname.end()); //read in the extra field @@ -273,7 +273,7 @@ cnpy::npz_t cnpy::npz_load(std::string fname) { } fclose(fp); - return arrays; + return arrays; } cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { @@ -293,7 +293,7 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { //read in the variable name uint16_t name_len = *(uint16_t*) &local_header[26]; std::string vname(name_len,' '); - size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp); + size_t vname_res = fread(&vname[0],sizeof(char),name_len,fp); if(vname_res != name_len) throw std::runtime_error("npz_load: failed fread"); vname.erase(vname.end()-4,vname.end()); //erase the lagging .npy @@ -301,7 +301,7 @@ cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) { //read in the extra field uint16_t extra_field_len = *(uint16_t*) &local_header[28]; fseek(fp,extra_field_len,SEEK_CUR); //skip past the extra field - + uint16_t compr_method = *reinterpret_cast(&local_header[0]+8); uint32_t compr_bytes = *reinterpret_cast(&local_header[0]+18); uint32_t uncompr_bytes = *reinterpret_cast(&local_header[0]+22); @@ -335,6 +335,3 @@ cnpy::NpyArray cnpy::npy_load(std::string fname) { fclose(fp); return arr; } - - - diff --git a/cnpy.h b/cnpy.h index 0d3bb4c..e961b0c 100644 --- a/cnpy.h +++ b/cnpy.h @@ -59,12 +59,13 @@ namespace cnpy { bool fortran_order; size_t num_vals; }; - - using npz_t = std::map; + + using npz_t = std::map; char BigEndianTest(); char map_type(const std::type_info& t); template std::vector create_npy_header(const std::vector& shape); + template std::vector create_npy_tuple_header(const std::vector& shape); void parse_npy_header(FILE* fp,size_t& word_size, std::vector& shape, bool& fortran_order); void parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector& shape, bool& fortran_order); void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset); @@ -75,7 +76,7 @@ namespace cnpy { template std::vector& operator+=(std::vector& lhs, const T rhs) { //write in little endian for(size_t byte = 0; byte < sizeof(T); byte++) { - char val = *((char*)&rhs+byte); + char val = *((char*)&rhs+byte); lhs.push_back(val); } return lhs; @@ -84,6 +85,31 @@ namespace cnpy { template<> std::vector& operator+=(std::vector& lhs, const std::string rhs); template<> std::vector& operator+=(std::vector& lhs, const char* rhs); + template constexpr void iterative_write_data(std::tuple data, const std::vector& shape,FILE* fp){ + + fwrite(&std::get(data),sizeof(typename std::tuple_element>::type),1,fp); + if constexpr(N != sizeof...(COLS) - 1){ + iterative_write_data(data, shape, fp); + } + } + + template constexpr size_t size_of(){ + + size_t size = 0; + size += sizeof(typename std::tuple_element>::type); + if constexpr(N != sizeof...(COLS) - 1){ + size += size_of(); + } + return size; + } + + template constexpr void generate_crc(uint32_t& crc, std::tuple data){ + + crc = crc32(crc,(uint8_t*)&std::get(data),sizeof(typename std::tuple_element>::type)); + if constexpr(N != sizeof...(COLS) - 1){ + generate_crc(crc,data); + } + } template void npy_save(std::string fname, const T* data, const std::vector shape, std::string mode = "w") { FILE* fp = NULL; @@ -220,6 +246,102 @@ namespace cnpy { fclose(fp); } + template void npz_save(std::string zipname, std::string fname, const std::vector> data, std::string mode = "w") + { + //first, append a .npy to the fname + fname += ".npy"; + + //now, on with the show + FILE* fp = NULL; + uint16_t nrecs = 0; + size_t global_header_offset = 0; + std::vector global_header; + + if(mode == "a") fp = fopen(zipname.c_str(),"r+b"); + + if(fp) { + //zip file exists. we need to add a new npy file to it. + //first read the footer. this gives us the offset and size of the global header + //then read and store the global header. + //below, we will write the the new data at the start of the global header then append the global header and footer below it + size_t global_header_size; + parse_zip_footer(fp,nrecs,global_header_size,global_header_offset); + fseek(fp,global_header_offset,SEEK_SET); + global_header.resize(global_header_size); + size_t res = fread(&global_header[0],sizeof(char),global_header_size,fp); + if(res != global_header_size){ + throw std::runtime_error("npz_save: header read error while adding to existing zip"); + } + fseek(fp,global_header_offset,SEEK_SET); + } + else { + fp = fopen(zipname.c_str(),"wb"); + } + size_t v_size = data.size(); + std::vector shape = {v_size}; + std::vector npy_header = create_npy_tuple_header({v_size}); + + size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); + size_t nbytes = nels*size_of<0,COLS...>() + npy_header.size(); + + //get the CRC of the data to be added + uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size()); + for(auto d : data) + generate_crc<0,COLS...>(crc,d); + //crc = crc32(crc,(uint8_t*)data.data(),nels*size_of<0,COLS...>()); + + //build the local header + std::vector local_header; + local_header += "PK"; //first part of sig + local_header += (uint16_t) 0x0403; //second part of sig + local_header += (uint16_t) 20; //min version to extract + local_header += (uint16_t) 0; //general purpose bit flag + local_header += (uint16_t) 0; //compression method + local_header += (uint16_t) 0; //file last mod time + local_header += (uint16_t) 0; //file last mod date + local_header += (uint32_t) crc; //crc + local_header += (uint32_t) nbytes; //compressed size + local_header += (uint32_t) nbytes; //uncompressed size + local_header += (uint16_t) fname.size(); //fname length + local_header += (uint16_t) 0; //extra field length + local_header += fname; + + //build global header + global_header += "PK"; //first part of sig + global_header += (uint16_t) 0x0201; //second part of sig + global_header += (uint16_t) 20; //version made by + global_header.insert(global_header.end(),local_header.begin()+4,local_header.begin()+30); + global_header += (uint16_t) 0; //file comment length + global_header += (uint16_t) 0; //disk number where file starts + global_header += (uint16_t) 0; //internal file attributes + global_header += (uint32_t) 0; //external file attributes + global_header += (uint32_t) global_header_offset; //relative offset of local file header, since it begins where the global header used to begin + global_header += fname; + + //build footer + std::vector footer; + footer += "PK"; //first part of sig + footer += (uint16_t) 0x0605; //second part of sig + footer += (uint16_t) 0; //number of this disk + footer += (uint16_t) 0; //disk where footer starts + footer += (uint16_t) (nrecs+1); //number of records on this disk + footer += (uint16_t) (nrecs+1); //total number of records + footer += (uint32_t) global_header.size(); //nbytes of global headers + footer += (uint32_t) (global_header_offset + nbytes + local_header.size()); //offset of start of global headers, since global header now starts after newly written array + footer += (uint16_t) 0; //zip file comment length + + //write everything + fwrite(&local_header[0],sizeof(char),local_header.size(),fp); + fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); + for(auto d : data){ + iterative_write_data<0>(d,shape,fp); + } + //fwrite(data.data(),size_of<0,COLS...>(),nels,fp); + fwrite(&global_header[0],sizeof(char),global_header.size(),fp); + fwrite(&footer[0],sizeof(char),footer.size(),fp); + fclose(fp); + } + template void npy_save(std::string fname, const std::vector data, std::string mode = "w") { std::vector shape; shape.push_back(data.size()); @@ -232,7 +354,65 @@ namespace cnpy { npz_save(zipname, fname, &data[0], shape, mode); } - template std::vector create_npy_header(const std::vector& shape) { + template + constexpr void iterate_dtype(std::vector& descr){ + + char endianTest = BigEndianTest(); + + descr += "('"; + descr += std::to_string(N); + descr += "', '"; + descr += endianTest; + descr += map_type(typeid(typename std::tuple_element>::type)); + descr += std::to_string(sizeof(typename std::tuple_element>::type)); + descr += "', (1,)"; + if constexpr(N != sizeof...(COLS)-1){ + descr += "),"; + iterate_dtype(descr); + }else{ + descr += ")"; + } + + } + + template void construct_tuple_dtype(std::vector& descr){ + + descr += "["; + iterate_dtype<0,COLS...>(descr); + descr += "]"; + + } + + template std::vector create_npy_tuple_header(const std::vector& shape) { + + std::vector dict; + dict += "{'descr': "; + construct_tuple_dtype(dict); + dict += ", 'fortran_order': False, 'shape': ("; + dict += std::to_string(shape[0]); + for(size_t i = 1;i < shape.size();i++) { + dict += ", "; + dict += std::to_string(shape[i]); + } + if(shape.size() == 1) dict += ","; + dict += "), }"; + //pad with spaces so that preamble+dict is modulo 16 bytes. preamble is 10 bytes. dict needs to end with \n + int remainder = 16 - (10 + dict.size()) % 16; + dict.insert(dict.end(),remainder,' '); + dict.back() = '\n'; + + std::vector header; + header += (char) 0x93; + header += "NUMPY"; + header += (char) 0x01; //major version of numpy format + header += (char) 0x00; //minor version of numpy format + header += (uint16_t) dict.size(); + header.insert(header.end(),dict.begin(),dict.end()); + + return header; + } + + template std::vector create_npy_header(const std::vector& shape) { std::vector dict; dict += "{'descr': '"; diff --git a/example1.cpp b/example1.cpp index 70ac5aa..65db46f 100644 --- a/example1.cpp +++ b/example1.cpp @@ -17,13 +17,18 @@ int main() std::vector> data(Nx*Ny*Nz); for(int i = 0;i < Nx*Ny*Nz;i++) data[i] = std::complex(rand(),rand()); + std::vector> tuple_array; + tuple_array.push_back(std::make_tuple('a',5,'c')); + tuple_array.push_back(std::make_tuple('b',4,'z')); + tuple_array.push_back(std::make_tuple('c',6,'g')); + //save it to file cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"w"); //load it into a new array cnpy::NpyArray arr = cnpy::npy_load("arr1.npy"); std::complex* loaded_data = arr.data>(); - + //make sure the loaded data matches the saved data assert(arr.word_size == sizeof(std::complex)); assert(arr.shape.size() == 3 && arr.shape[0] == Nz && arr.shape[1] == Ny && arr.shape[2] == Nx); @@ -31,7 +36,7 @@ int main() //append the same data to file //npy array on file now has shape (Nz+Nz,Ny,Nx) - cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a"); +// cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a"); //now write to an npz file //non-array variables are treated as 1D arrays with 1 element @@ -40,16 +45,17 @@ int main() cnpy::npz_save("out.npz","myVar1",&myVar1,{1},"w"); //"w" overwrites any existing file cnpy::npz_save("out.npz","myVar2",&myVar2,{1},"a"); //"a" appends to the file we created above cnpy::npz_save("out.npz","arr1",&data[0],{Nz,Ny,Nx},"a"); //"a" appends to the file we created above + cnpy::npz_save("out.npz","tuplearr",tuple_array,"a"); //"a" appends to the file we created above //load a single var from the npz file - cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1"); +// cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1"); //load the entire npz file - cnpy::npz_t my_npz = cnpy::npz_load("out.npz"); - +// cnpy::npz_t my_npz = cnpy::npz_load("out.npz"); + //check that the loaded myVar1 matches myVar1 - cnpy::NpyArray arr_mv1 = my_npz["myVar1"]; - double* mv1 = arr_mv1.data(); - assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1); - assert(mv1[0] == myVar1); +// cnpy::NpyArray arr_mv1 = my_npz["myVar1"]; +// double* mv1 = arr_mv1.data(); +// assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1); +// assert(mv1[0] == myVar1); } diff --git a/out.npy b/out.npy new file mode 100644 index 0000000000000000000000000000000000000000..83b9d3cdd62a814b3cf7b0d3531d6fa75019b7aa GIT binary patch literal 144 zcmbR27wQ`j$;eQ~P_3SlTAW;@Zlw^dp;E*Xs~An(+my(31lAT literal 0 HcmV?d00001 From a9be5f6ab33531f1dccc8e4b9d3c1dabc3337ffc Mon Sep 17 00:00:00 2001 From: Lala5th Date: Fri, 7 Aug 2020 06:28:34 +0200 Subject: [PATCH 03/10] Implemented npy write and append. I have implemented all write functionality (Maybe apart from tedious checking of dtype). Read functionality might be possible, but I have not looked at that part of the code so I am unsure about the difficulty of the undertaking. --- cnpy.cpp | 60 ++++++++++++++++++++++ cnpy.h | 141 ++++++++++++++++++++++++++++++++++++++------------- example1.cpp | 17 ++++--- 3 files changed, 175 insertions(+), 43 deletions(-) diff --git a/cnpy.cpp b/cnpy.cpp index b785ce3..231ea99 100644 --- a/cnpy.cpp +++ b/cnpy.cpp @@ -152,6 +152,66 @@ void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector& sh word_size = atoi(str_ws.substr(0,loc2).c_str()); } +void cnpy::parse_npy_header(FILE* fp, std::vector dtype_descr, std::vector& shape, bool& fortran_order) { + char buffer[256]; + size_t res = fread(buffer,sizeof(char),11,fp); + if(res != 11) + throw std::runtime_error("parse_npy_header: failed fread"); + std::string header = fgets(buffer,256,fp); + assert(header[header.size()-1] == '\n'); + + size_t loc1, loc2; + + //fortran order + loc1 = header.find("fortran_order"); + if (loc1 == std::string::npos) + throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'"); + loc1 += 16; + fortran_order = (header.substr(loc1,4) == "True" ? true : false); + + //shape + size_t loc = header.find("]"); + if(loc == std::string::npos) + throw std::runtime_error("parse_npy_header: failed to find header keyword: ']' signalling end of dtype descriptor"); + loc1 = header.find("(",loc); + loc2 = header.find(")",loc); + if (loc1 == std::string::npos || loc2 == std::string::npos) + throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'"); + + std::regex num_regex("[0-9][0-9]*"); + std::smatch sm; + shape.clear(); + + std::string str_shape = header.substr(loc1+1,loc2-loc1-1); + while(std::regex_search(str_shape, sm, num_regex)) { + shape.push_back(std::stoi(sm[0].str())); + str_shape = sm.suffix().str(); + } + + // Only enforces matching dtypes + loc1 = header.find("["); + loc2 = loc; + if (loc1 == std::string::npos || loc2 == std::string::npos) + throw std::runtime_error("parse_npy_header: failed to find header keyword: '[' or ']'"); // Find bounds of dtype + + std::string descr = header.substr(loc1,loc2-loc1+1); + int offset_in = 0; + for(int i = 0;i != descr.size();i++){ // Check if found and provided dtype match + if(dtype_descr[i+offset_in] == ' '){ // Possible Out Of Bounds, but only if invalid dtype, or if this loop is buggy/the substr gen is buggy + offset_in++; + i--; + continue; + } + if(descr[i] == ' '){ + offset_in--; + continue; + } + if(descr[i] != dtype_descr[i+offset_in]) + throw std::runtime_error("Wrong dtyp of .npy file"); // Can only avoid if corrupt file, but then throws error above + // NO error if compatible datatypes (i.e. char and int is footer(22); diff --git a/cnpy.h b/cnpy.h index e961b0c..533ce24 100644 --- a/cnpy.h +++ b/cnpy.h @@ -68,6 +68,7 @@ namespace cnpy { template std::vector create_npy_tuple_header(const std::vector& shape); void parse_npy_header(FILE* fp,size_t& word_size, std::vector& shape, bool& fortran_order); void parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector& shape, bool& fortran_order); + void parse_npy_header(FILE* fp,std::vector dtype_descr, std::vector& shape, bool& fortran_order); void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset); npz_t npz_load(std::string fname); NpyArray npz_load(std::string fname, std::string varname); @@ -85,6 +86,35 @@ namespace cnpy { template<> std::vector& operator+=(std::vector& lhs, const std::string rhs); template<> std::vector& operator+=(std::vector& lhs, const char* rhs); + + template constexpr void iterate_dtype(std::vector& descr){ + + char endianTest = BigEndianTest(); + + descr += "('"; + descr += std::to_string(N); + descr += "', '"; + descr += endianTest; + descr += map_type(typeid(typename std::tuple_element>::type)); + descr += std::to_string(sizeof(typename std::tuple_element>::type)); + descr += "', (1,)"; + if constexpr(N != sizeof...(COLS)-1){ + descr += "),"; + iterate_dtype(descr); + }else{ + descr += ")"; + } + + } + + template void construct_tuple_dtype(std::vector& descr){ + + descr += "["; + iterate_dtype<0,COLS...>(descr); + descr += "]"; + + } + template constexpr void iterative_write_data(std::tuple data, const std::vector& shape,FILE* fp){ fwrite(&std::get(data),sizeof(typename std::tuple_element>::type),1,fp); @@ -156,6 +186,58 @@ namespace cnpy { fclose(fp); } + template void npy_save(std::string fname, const std::tuple* data, const std::vector shape, std::string mode = "w") { + FILE* fp = NULL; + std::vector true_data_shape; //if appending, the shape of existing + new data + + if(mode == "a") fp = fopen(fname.c_str(),"r+b"); + + if(fp) { + //file exists. we need to append to it. read the header, modify the array size + size_t word_size; + bool fortran_order; + std::vector dtype; + construct_tuple_dtype(dtype); + parse_npy_header(fp,dtype,true_data_shape,fortran_order); + assert(!fortran_order); + + if(true_data_shape.size() != shape.size()) { + std::cout<<"libnpy error: npy_save attempting to append misdimensioned data to "< header = create_npy_tuple_header(true_data_shape); + size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); + + fseek(fp,0,SEEK_SET); + fwrite(&header[0],sizeof(char),header.size(),fp); + fseek(fp,0,SEEK_END); + size_t len = 1; + if(shape.empty()){ + len = 0; + } + for(auto d : shape){ + len *= d; + } + for(int i;i < len;i++){ + iterative_write_data<0>(data[i],shape,fp); + } + fclose(fp); + } + template void npz_save(std::string zipname, std::string fname, const T* data, const std::vector& shape, std::string mode = "w") { //first, append a .npy to the fname @@ -246,7 +328,7 @@ namespace cnpy { fclose(fp); } - template void npz_save(std::string zipname, std::string fname, const std::vector> data, std::string mode = "w") + template void npz_save(std::string zipname, std::string fname, const std::tuple* data, const std::vector& shape, std::string mode = "w") { //first, append a .npy to the fname fname += ".npy"; @@ -277,17 +359,22 @@ namespace cnpy { else { fp = fopen(zipname.c_str(),"wb"); } - size_t v_size = data.size(); - std::vector shape = {v_size}; - std::vector npy_header = create_npy_tuple_header({v_size}); + std::vector npy_header = create_npy_tuple_header(shape); size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); size_t nbytes = nels*size_of<0,COLS...>() + npy_header.size(); //get the CRC of the data to be added + size_t len = 1; + if(shape.empty()){ + len = 0; + } + for(auto d : shape){ + len *= d; + } uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size()); - for(auto d : data) - generate_crc<0,COLS...>(crc,d); + for(int i;i < len;i++) + generate_crc<0,COLS...>(crc,data[i]); //crc = crc32(crc,(uint8_t*)data.data(),nels*size_of<0,COLS...>()); //build the local header @@ -333,10 +420,9 @@ namespace cnpy { //write everything fwrite(&local_header[0],sizeof(char),local_header.size(),fp); fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); - for(auto d : data){ - iterative_write_data<0>(d,shape,fp); + for(int i;i < len;i++){ + iterative_write_data<0>(data[i],shape,fp); } - //fwrite(data.data(),size_of<0,COLS...>(),nels,fp); fwrite(&global_header[0],sizeof(char),global_header.size(),fp); fwrite(&footer[0],sizeof(char),footer.size(),fp); fclose(fp); @@ -348,39 +434,22 @@ namespace cnpy { npy_save(fname, &data[0], shape, mode); } - template void npz_save(std::string zipname, std::string fname, const std::vector data, std::string mode = "w") { + template void npy_save(std::string fname, const std::vector> data, std::string mode = "w") { std::vector shape; shape.push_back(data.size()); - npz_save(zipname, fname, &data[0], shape, mode); + npy_save(fname, &data[0], shape, mode); } - template - constexpr void iterate_dtype(std::vector& descr){ - - char endianTest = BigEndianTest(); - - descr += "('"; - descr += std::to_string(N); - descr += "', '"; - descr += endianTest; - descr += map_type(typeid(typename std::tuple_element>::type)); - descr += std::to_string(sizeof(typename std::tuple_element>::type)); - descr += "', (1,)"; - if constexpr(N != sizeof...(COLS)-1){ - descr += "),"; - iterate_dtype(descr); - }else{ - descr += ")"; - } - + template void npz_save(std::string zipname, std::string fname, const std::vector data, std::string mode = "w") { + std::vector shape; + shape.push_back(data.size()); + npz_save(zipname, fname, &data[0], shape, mode); } - template void construct_tuple_dtype(std::vector& descr){ - - descr += "["; - iterate_dtype<0,COLS...>(descr); - descr += "]"; - + template void npz_save(std::string zipname, std::string fname, const std::vector> data, std::string mode = "w") { + std::vector shape; + shape.push_back(data.size()); + npz_save(zipname, fname, &data[0], shape, mode); } template std::vector create_npy_tuple_header(const std::vector& shape) { diff --git a/example1.cpp b/example1.cpp index 65db46f..734dbe8 100644 --- a/example1.cpp +++ b/example1.cpp @@ -24,6 +24,9 @@ int main() //save it to file cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"w"); + cnpy::npy_save("arr2.npy",tuple_array,"w"); + // try append + cnpy::npy_save("arr2.npy",new std::tuple('a',5,'g'),{1},"a"); //load it into a new array cnpy::NpyArray arr = cnpy::npy_load("arr1.npy"); @@ -36,7 +39,7 @@ int main() //append the same data to file //npy array on file now has shape (Nz+Nz,Ny,Nx) -// cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a"); + cnpy::npy_save("arr1.npy",&data[0],{Nz,Ny,Nx},"a"); //now write to an npz file //non-array variables are treated as 1D arrays with 1 element @@ -48,14 +51,14 @@ int main() cnpy::npz_save("out.npz","tuplearr",tuple_array,"a"); //"a" appends to the file we created above //load a single var from the npz file -// cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1"); + cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1"); //load the entire npz file -// cnpy::npz_t my_npz = cnpy::npz_load("out.npz"); + cnpy::npz_t my_npz = cnpy::npz_load("out.npz"); //check that the loaded myVar1 matches myVar1 -// cnpy::NpyArray arr_mv1 = my_npz["myVar1"]; -// double* mv1 = arr_mv1.data(); -// assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1); -// assert(mv1[0] == myVar1); + cnpy::NpyArray arr_mv1 = my_npz["myVar1"]; + double* mv1 = arr_mv1.data(); + assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1); + assert(mv1[0] == myVar1); } From 31392ad661365311f867f6be6b43ac48877cbe56 Mon Sep 17 00:00:00 2001 From: Lala5th Date: Fri, 7 Aug 2020 19:41:55 +0200 Subject: [PATCH 04/10] Updated Readme to include dependency on C++17 --- README.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 37c4a43..b97eedb 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,19 @@ # Purpose: -NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file. +NumPy offers the `save` method for easy saving of arrays into .npy and `savez` for zipping multiple .npy arrays together into a .npz file. -`cnpy` lets you read and write to these formats in C++. +`cnpy` lets you read and write to these formats in C++. The motivation comes from scientific programming where large amounts of data are generated in C++ and analyzed in Python. -Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size. +Writing to .npy has the advantage of using low-level C++ I/O (fread and fwrite) for speed and binary format for size. The .npy file header takes care of specifying the size, shape, and data type of the array, so specifying the format of the data is unnecessary. Loading data written in numpy formats into C++ is equally simple, but requires you to type-cast the loaded data to the type of your choice. # Installation: -Default installation directory is /usr/local. +Default installation directory is /usr/local. To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/dir` to the cmake invocation in step 4. 1. get [cmake](www.cmake.org) @@ -28,7 +28,7 @@ To specify a different directory, add `-DCMAKE_INSTALL_PREFIX=/path/to/install/d To use, `#include"cnpy.h"` in your source code. Compile the source code mycode.cpp as ```bash -g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11 +g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++17 ``` # Description: @@ -36,12 +36,12 @@ g++ -o mycode mycode.cpp -L/path/to/install/dir -lcnpy -lz --std=c++11 There are two functions for writing data: `npy_save` and `npz_save`. There are 3 functions for reading: -- `npy_load` will load a .npy file. -- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues. +- `npy_load` will load a .npy file. +- `npz_load(fname)` will load a .npz and return a dictionary of NpyArray structues. - `npz_load(fname,varname)` will load and return the NpyArray for data varname from the specified .npz file. -The data structure for loaded data is below. -Data is accessed via the `data()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data). +The data structure for loaded data is below. +Data is accessed via the `data()`-method, which returns a pointer of the specified type (which must match the underlying datatype of the data). The array shape and word size are read from the npy header. ```c++ From f39beddf92be7d08d391403a5993ed0e370d4eec Mon Sep 17 00:00:00 2001 From: Lala5th Date: Wed, 16 Sep 2020 23:54:12 +0100 Subject: [PATCH 05/10] Fixed typo --- cnpy.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cnpy.h b/cnpy.h index 533ce24..df91793 100644 --- a/cnpy.h +++ b/cnpy.h @@ -232,7 +232,7 @@ namespace cnpy { for(auto d : shape){ len *= d; } - for(int i;i < len;i++){ + for(int i = 0;i < len;i++){ iterative_write_data<0>(data[i],shape,fp); } fclose(fp); @@ -373,7 +373,7 @@ namespace cnpy { len *= d; } uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size()); - for(int i;i < len;i++) + for(int i = 0;i < len;i++) generate_crc<0,COLS...>(crc,data[i]); //crc = crc32(crc,(uint8_t*)data.data(),nels*size_of<0,COLS...>()); @@ -420,7 +420,7 @@ namespace cnpy { //write everything fwrite(&local_header[0],sizeof(char),local_header.size(),fp); fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); - for(int i;i < len;i++){ + for(int i = 0;i < len;i++){ iterative_write_data<0>(data[i],shape,fp); } fwrite(&global_header[0],sizeof(char),global_header.size(),fp); From c818b3bc75ad576c74f6ebc669146dffc7dbe5c5 Mon Sep 17 00:00:00 2001 From: Lala5th Date: Mon, 28 Sep 2020 14:34:04 +0100 Subject: [PATCH 06/10] Fixed compiler warnings --- cnpy.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/cnpy.h b/cnpy.h index df91793..a43ca84 100644 --- a/cnpy.h +++ b/cnpy.h @@ -194,7 +194,6 @@ namespace cnpy { if(fp) { //file exists. we need to append to it. read the header, modify the array size - size_t word_size; bool fortran_order; std::vector dtype; construct_tuple_dtype(dtype); @@ -220,7 +219,6 @@ namespace cnpy { } std::vector header = create_npy_tuple_header(true_data_shape); - size_t nels = std::accumulate(shape.begin(),shape.end(),1,std::multiplies()); fseek(fp,0,SEEK_SET); fwrite(&header[0],sizeof(char),header.size(),fp); @@ -232,7 +230,7 @@ namespace cnpy { for(auto d : shape){ len *= d; } - for(int i = 0;i < len;i++){ + for(size_t i = 0;i < len;i++){ iterative_write_data<0>(data[i],shape,fp); } fclose(fp); @@ -373,7 +371,7 @@ namespace cnpy { len *= d; } uint32_t crc = crc32(0L,(uint8_t*)&npy_header[0],npy_header.size()); - for(int i = 0;i < len;i++) + for(size_t i = 0;i < len;i++) generate_crc<0,COLS...>(crc,data[i]); //crc = crc32(crc,(uint8_t*)data.data(),nels*size_of<0,COLS...>()); @@ -420,7 +418,7 @@ namespace cnpy { //write everything fwrite(&local_header[0],sizeof(char),local_header.size(),fp); fwrite(&npy_header[0],sizeof(char),npy_header.size(),fp); - for(int i = 0;i < len;i++){ + for(size_t i = 0;i < len;i++){ iterative_write_data<0>(data[i],shape,fp); } fwrite(&global_header[0],sizeof(char),global_header.size(),fp); From aa8658b1b244b8af5b3980af3084875e47fd76b0 Mon Sep 17 00:00:00 2001 From: Lala5th Date: Mon, 1 Feb 2021 22:23:15 +0000 Subject: [PATCH 07/10] Minor edit so unnecessary nesting doesn't happen --- cnpy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cnpy.h b/cnpy.h index a43ca84..e454bed 100644 --- a/cnpy.h +++ b/cnpy.h @@ -97,7 +97,7 @@ namespace cnpy { descr += endianTest; descr += map_type(typeid(typename std::tuple_element>::type)); descr += std::to_string(sizeof(typename std::tuple_element>::type)); - descr += "', (1,)"; + descr += "'"; if constexpr(N != sizeof...(COLS)-1){ descr += "),"; iterate_dtype(descr); From ea5f39d00f8d3dcee8c517f641d4ec8f61d8abf8 Mon Sep 17 00:00:00 2001 From: Lajos Palanki Date: Mon, 16 Feb 2026 10:26:18 +0000 Subject: [PATCH 08/10] Marked unused variables --- cnpy.cpp | 16 ++++++++-------- example1.cpp | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cnpy.cpp b/cnpy.cpp index 231ea99..c4117bb 100644 --- a/cnpy.cpp +++ b/cnpy.cpp @@ -61,9 +61,9 @@ template<> std::vector& cnpy::operator+=(std::vector& lhs, const cha void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector& shape, bool& fortran_order) { //std::string magic_string(buffer,6); - uint8_t major_version = *reinterpret_cast(buffer+6); - uint8_t minor_version = *reinterpret_cast(buffer+7); - uint16_t header_len = *reinterpret_cast(buffer+8); + [[maybe_unused]]uint8_t major_version = *reinterpret_cast(buffer+6); + [[maybe_unused]]uint8_t minor_version = *reinterpret_cast(buffer+7); + [[maybe_unused]]uint16_t header_len = *reinterpret_cast(buffer+8); std::string header(reinterpret_cast(buffer+9),header_len); size_t loc1, loc2; @@ -90,7 +90,7 @@ void cnpy::parse_npy_header(unsigned char* buffer,size_t& word_size, std::vector //byte order code | stands for not applicable. //not sure when this applies except for byte array loc1 = header.find("descr")+9; - bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); + [[maybe_unused]]bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); assert(littleEndian); //char type = header[loc1+1]; @@ -141,7 +141,7 @@ void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector& sh if (loc1 == std::string::npos) throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'"); loc1 += 9; - bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); + [[maybe_unused]]bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); assert(littleEndian); //char type = header[loc1+1]; @@ -196,7 +196,7 @@ void cnpy::parse_npy_header(FILE* fp, std::vector dtype_descr, std::vector std::string descr = header.substr(loc1,loc2-loc1+1); int offset_in = 0; - for(int i = 0;i != descr.size();i++){ // Check if found and provided dtype match + for(size_t i = 0;i != descr.size();i++){ // Check if found and provided dtype match if(dtype_descr[i+offset_in] == ' '){ // Possible Out Of Bounds, but only if invalid dtype, or if this loop is buggy/the substr gen is buggy offset_in++; i--; @@ -220,7 +220,7 @@ void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_siz if(res != 22) throw std::runtime_error("parse_zip_footer: failed fread"); - uint16_t disk_no, disk_start, nrecs_on_disk, comment_len; + [[maybe_unused]]uint16_t disk_no, disk_start, nrecs_on_disk, comment_len; disk_no = *(uint16_t*) &footer[4]; disk_start = *(uint16_t*) &footer[6]; nrecs_on_disk = *(uint16_t*) &footer[8]; @@ -256,7 +256,7 @@ cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncom if(nread != compr_bytes) throw std::runtime_error("load_the_npy_file: failed fread"); - int err; + [[maybe_unused]]int err; z_stream d_stream; d_stream.zalloc = Z_NULL; diff --git a/example1.cpp b/example1.cpp index 734dbe8..9234603 100644 --- a/example1.cpp +++ b/example1.cpp @@ -30,7 +30,7 @@ int main() //load it into a new array cnpy::NpyArray arr = cnpy::npy_load("arr1.npy"); - std::complex* loaded_data = arr.data>(); + [[maybe_unused]] std::complex* loaded_data = arr.data>(); //make sure the loaded data matches the saved data assert(arr.word_size == sizeof(std::complex)); @@ -58,7 +58,7 @@ int main() //check that the loaded myVar1 matches myVar1 cnpy::NpyArray arr_mv1 = my_npz["myVar1"]; - double* mv1 = arr_mv1.data(); + [[maybe_unused]] double* mv1 = arr_mv1.data(); assert(arr_mv1.shape.size() == 1 && arr_mv1.shape[0] == 1); assert(mv1[0] == myVar1); } From b736a3604aeefdfd25eeba474f2c88deb4abd0c7 Mon Sep 17 00:00:00 2001 From: Lajos Palanki Date: Thu, 12 Mar 2026 09:40:55 +0000 Subject: [PATCH 09/10] Fixed typo in example --- example1.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example1.cpp b/example1.cpp index 9234603..3ad03a5 100644 --- a/example1.cpp +++ b/example1.cpp @@ -48,7 +48,7 @@ int main() cnpy::npz_save("out.npz","myVar1",&myVar1,{1},"w"); //"w" overwrites any existing file cnpy::npz_save("out.npz","myVar2",&myVar2,{1},"a"); //"a" appends to the file we created above cnpy::npz_save("out.npz","arr1",&data[0],{Nz,Ny,Nx},"a"); //"a" appends to the file we created above - cnpy::npz_save("out.npz","tuplearr",tuple_array,"a"); //"a" appends to the file we created above + cnpy::npz_save("out.npz","tuplearr",tuple_array,"a"); //"a" appends to the file we created above //load a single var from the npz file cnpy::NpyArray arr2 = cnpy::npz_load("out.npz","arr1"); From fe14f28f631422b8ef89828b7570e443e3222cee Mon Sep 17 00:00:00 2001 From: Lajos Palanki Date: Mon, 16 Mar 2026 12:54:30 +0000 Subject: [PATCH 10/10] Updated deprecated cmake option --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c6a2b74..3344707 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 3.0 FATAL_ERROR) +CMAKE_MINIMUM_REQUIRED(VERSION 3.10 FATAL_ERROR) if(COMMAND cmake_policy) cmake_policy(SET CMP0003 NEW) endif(COMMAND cmake_policy)