From 33d673c46d640abaadc3a6dfaa6ccbda3224f571 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 25 Aug 2024 11:36:46 +0200 Subject: [PATCH 1/4] tprintf: Add C++ stream for log messages Signed-off-by: Stefan Weil --- src/ccutil/tesserrstream.h | 67 ++++++++++++++++++++++++++++++++++++++ src/ccutil/tprintf.cpp | 12 ++++--- src/ccutil/tprintf.h | 3 ++ 3 files changed, 78 insertions(+), 4 deletions(-) create mode 100644 src/ccutil/tesserrstream.h diff --git a/src/ccutil/tesserrstream.h b/src/ccutil/tesserrstream.h new file mode 100644 index 0000000000..3452f35e99 --- /dev/null +++ b/src/ccutil/tesserrstream.h @@ -0,0 +1,67 @@ +// File: tesserrstream.h +// Description: C++ stream which enhances tprintf +// Author: Stefan Weil +// +// (C) Copyright 2024 +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TESSERACT_CCUTIL_TESSERRSTREAM_H +#define TESSERACT_CCUTIL_TESSERRSTREAM_H + +#include "tprintf.h" + +#include // for std::ostream + +namespace tesseract { + +class TessStreamBuf : public std::streambuf { +public: + TessStreamBuf() = default; + +protected: + virtual int_type overflow(int_type c) override { + if (c != EOF) { + if (debugfp == nullptr) { + debugfp = get_debugfp(); + } + if (fputc(c, debugfp) == EOF) { + return EOF; + } + } + return c; + } + + virtual std::streamsize xsputn(const char* s, std::streamsize n) override { + if (debugfp == nullptr) { + debugfp = get_debugfp(); + } + return fwrite(s, 1, n, debugfp); + } + +private: + FILE *debugfp = nullptr; +}; + +class TessErrStream : public std::ostream { +private: + TessStreamBuf buf; + +public: + TessErrStream() : std::ostream(nullptr), buf() { + rdbuf(&buf); + } +}; + +extern TessErrStream tesserr; + +} // namespace tesseract + +#endif // TESSERACT_CCUTIL_TESSERRSTREAM_H diff --git a/src/ccutil/tprintf.cpp b/src/ccutil/tprintf.cpp index 64a1430112..2739b6cec4 100644 --- a/src/ccutil/tprintf.cpp +++ b/src/ccutil/tprintf.cpp @@ -21,6 +21,7 @@ # include "config_auto.h" #endif +#include "tesserrstream.h" #include "tprintf.h" #include "params.h" @@ -36,7 +37,7 @@ INT_VAR(log_level, INT_MAX, "Logging level"); static STRING_VAR(debug_file, "", "File to send tprintf output to"); // File for debug output. -static FILE *debugfp; +FILE *debugfp; // Set output for log messages. // The output is written to stderr if debug_file is empty. @@ -49,7 +50,7 @@ static FILE *debugfp; // tprintf("write to /tmp/log\n"); // debug_file = ""; // tprintf("write to stderr\n"); -static void set_debugfp() { +FILE *get_debugfp() { if (debug_file.empty()) { // Write to stderr. if (debugfp != stderr && debugfp != nullptr) { @@ -66,15 +67,18 @@ static void set_debugfp() { #endif debugfp = fopen(debug_file.c_str(), "wb"); } + return debugfp; } // Trace printf. void tprintf(const char *format, ...) { - set_debugfp(); + FILE *f = get_debugfp(); va_list args; // variable args va_start(args, format); // variable list - vfprintf(debugfp, format, args); + vfprintf(f, format, args); va_end(args); } +TessErrStream tesserr; + } // namespace tesseract diff --git a/src/ccutil/tprintf.h b/src/ccutil/tprintf.h index 489964cb69..574cbbb708 100644 --- a/src/ccutil/tprintf.h +++ b/src/ccutil/tprintf.h @@ -36,6 +36,9 @@ extern TESS_API void tprintf( // Trace printf const char *format, ...) // Message __attribute__((format(printf, 1, 2))); +// Get file for debug output. +FILE *get_debugfp(); + } // namespace tesseract #undef __attribute__ From bd7b3571ccfac0da2e1cdece30879efa48d31a47 Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 23 Aug 2024 11:29:19 +0200 Subject: [PATCH 2/4] Print time for tessedit_timing_debug in milliseconds Optimize also the code a little bit. Signed-off-by: Stefan Weil --- src/ccmain/control.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/ccmain/control.cpp b/src/ccmain/control.cpp index 8416923f7d..454aa94f2a 100644 --- a/src/ccmain/control.cpp +++ b/src/ccmain/control.cpp @@ -41,6 +41,7 @@ #endif #include "sorthelper.h" #include "tesseractclass.h" +#include "tesserrstream.h" // for tesserr #include "tessvars.h" #include "werdit.h" @@ -1313,9 +1314,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD PointerVector best_words; // Points to the best result. May be word or in lang_words. const WERD_RES *word = word_data->word; - clock_t start_t = 0; - if (tessedit_timing_debug) { - start_t = clock(); + clock_t total_time = 0; + const bool timing_debug = tessedit_timing_debug; + if (timing_debug) { + total_time = clock(); } const bool debug = classify_debug_level > 0 || multilang_debug_level > 0; if (debug) { @@ -1368,10 +1370,10 @@ void Tesseract::classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordD } else { tprintf("no best words!!\n"); } - if (tessedit_timing_debug) { - clock_t ocr_t = clock(); - tprintf("%s (ocr took %.2f sec)\n", word_data->word->best_choice->unichar_string().c_str(), - static_cast(ocr_t - start_t) / CLOCKS_PER_SEC); + if (timing_debug) { + total_time = clock() - total_time; + tesserr << word_data->word->best_choice->unichar_string() + << " (ocr took " << 1000 * total_time / CLOCKS_PER_SEC << " ms)\n"; } } From 7ef8e3c7ee921d768402c41a49b209b793bc79ab Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Fri, 23 Aug 2024 11:47:56 +0200 Subject: [PATCH 3/4] Print time for ErrorCounter::ComputeErrorRate in milliseconds Optimize also the code, replace tprintf by C++ stream and call clock() only when needed. Signed-off-by: Stefan Weil --- src/training/common/errorcounter.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/training/common/errorcounter.cpp b/src/training/common/errorcounter.cpp index 3d5a5bbb4f..5fb83cca29 100644 --- a/src/training/common/errorcounter.cpp +++ b/src/training/common/errorcounter.cpp @@ -23,6 +23,7 @@ #include "sampleiterator.h" #include "shapeclassifier.h" #include "shapetable.h" +#include "tesserrstream.h" #include "trainingsample.h" #include "trainingsampleset.h" #include "unicity_table.h" @@ -50,7 +51,10 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le ErrorCounter counter(classifier->GetUnicharset(), fontsize); std::vector results; - clock_t start = clock(); + clock_t total_time = 0; + if (report_level > 1) { + total_time = clock(); + } unsigned total_samples = 0; double unscaled_error = 0.0; // Set a number of samples on which to run the classify debug mode. @@ -85,7 +89,6 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le } ++total_samples; } - const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC; // Create the appropriate error report. unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it, unichar_error, fonts_report); @@ -94,8 +97,9 @@ double ErrorCounter::ComputeErrorRate(ShapeClassifier *classifier, int report_le } if (report_level > 1 && total_samples > 0) { // It is useful to know the time in microseconds/char. - tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time, - 1000000.0 * total_time / total_samples); + total_time = 1000 * (clock() - total_time) / CLOCKS_PER_SEC; + tesserr << "Errors computed in " << total_time << " ms at " + << 1000 * total_time / total_samples << " μs/char\n"; } return unscaled_error; } From 37d1c6506d488c4e828e63ccfe133d6c865873ad Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Tue, 3 Sep 2024 17:31:38 +0200 Subject: [PATCH 4/4] Add TESS_API in declaration for tesserr (fix sw build) Signed-off-by: Stefan Weil --- src/ccutil/tesserrstream.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ccutil/tesserrstream.h b/src/ccutil/tesserrstream.h index 3452f35e99..80da99da0e 100644 --- a/src/ccutil/tesserrstream.h +++ b/src/ccutil/tesserrstream.h @@ -17,6 +17,7 @@ #define TESSERACT_CCUTIL_TESSERRSTREAM_H #include "tprintf.h" +#include // for TESS_API #include // for std::ostream @@ -60,7 +61,7 @@ class TessErrStream : public std::ostream { } }; -extern TessErrStream tesserr; +extern TESS_API TessErrStream tesserr; } // namespace tesseract