Skip to content

Commit fc70f76

Browse files
Do the improvement
1 parent 8ee020e commit fc70f76

File tree

3 files changed

+49
-1
lines changed

3 files changed

+49
-1
lines changed

src/ccutil/tessdatamanager.cpp

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,32 @@ bool TessdataManager::LoadArchiveFile(const char *filename) {
8585
}
8686
return result;
8787
}
88+
89+
bool TessdataManager::SaveArchiveFile(const char *filename) const{
90+
bool result = false;
91+
archive *a = archive_write_new();
92+
archive_entry *ae = archive_entry_new();
93+
if (a != nullptr) {
94+
archive_write_set_format_zip(a);
95+
archive_write_open_filename(a, filename);
96+
std::string filename_str = filename;
97+
filename_str += ".";
98+
archive_entry_set_filetype(ae, AE_IFREG);
99+
archive_entry_set_perm(ae, 333);
100+
for (unsigned i = 0; i < TESSDATA_NUM_ENTRIES; ++i) {
101+
if (!entries_[i].empty()) {
102+
archive_entry_set_pathname(ae, (filename_str + kTessdataFileSuffixes[i]).c_str());
103+
archive_entry_set_size(ae, entries_[i].size());
104+
archive_write_header(a, ae);
105+
archive_write_data(a, &entries_[i][0], entries_[i].size());
106+
}
107+
}
108+
result = archive_write_close(a) == ARCHIVE_OK;
109+
archive_write_free(a);
110+
return result;
111+
}
112+
return result;
113+
}
88114
#endif
89115

90116
bool TessdataManager::Init(const char *data_file_name) {
@@ -162,12 +188,16 @@ void TessdataManager::OverwriteEntry(TessdataType type, const char *data, int si
162188

163189
// Saves to the given filename.
164190
bool TessdataManager::SaveFile(const char *filename, FileWriter writer) const {
165-
// TODO: This method supports only the proprietary file format.
191+
// TODO: This method supports only the proprietary file format.
166192
ASSERT_HOST(is_loaded_);
167193
std::vector<char> data;
168194
Serialize(&data);
169195
if (writer == nullptr) {
196+
#if defined(HAVE_LIBARCHIVE)
197+
return SaveArchiveFile(filename);
198+
#else
170199
return SaveDataToFile(data, filename);
200+
#endif
171201
} else {
172202
return (*writer)(data, filename);
173203
}

src/ccutil/tessdatamanager.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ class TESS_API TessdataManager {
223223
private:
224224
// Use libarchive.
225225
bool LoadArchiveFile(const char *filename);
226+
bool SaveArchiveFile(const char *filename) const;
226227

227228
/**
228229
* Fills type with TessdataType of the tessdata component represented by the

src/training/combine_tessdata.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,19 @@ int main(int argc, char **argv) {
219219
tprintf("Failed to write modified traineddata:%s!\n", argv[2]);
220220
return EXIT_FAILURE;
221221
}
222+
} else if (argc == 3 && strcmp(argv[1], "-t") == 0) {
223+
#if defined(HAVE_LIBARCHIVE)
224+
if (!tm.Init(argv[2])) {
225+
tprintf("Failed to read %s\n", argv[2]);
226+
return EXIT_FAILURE;
227+
}
228+
if (!tm.SaveFile(argv[2], nullptr)) {
229+
tprintf("Failed to tranform traineddata:%s!\n", argv[2]);
230+
return EXIT_FAILURE;
231+
}
232+
#else
233+
tprintf("Failed to load libarchive. Is tesseract compiled with libarchive support?\n");
234+
#endif
222235
} else if (argc == 3 && strcmp(argv[1], "-d") == 0) {
223236
return list_components(tm, argv[2]);
224237
} else if (argc == 3 && strcmp(argv[1], "-l") == 0) {
@@ -272,6 +285,10 @@ int main(int argc, char **argv) {
272285
"Usage for compacting LSTM component to int:\n"
273286
" %s -c traineddata_file\n",
274287
argv[0]);
288+
printf(
289+
"Usage for transforming the proprietary .traineddata file to a zip archive:\n"
290+
" %s -t traineddata_file\n",
291+
argv[0]);
275292
return EXIT_FAILURE;
276293
}
277294
tm.Directory();

0 commit comments

Comments
 (0)