diff options
Diffstat (limited to 'src/libcamera/software_isp')
-rw-r--r-- | src/libcamera/software_isp/TODO | 279 | ||||
-rw-r--r-- | src/libcamera/software_isp/debayer.cpp | 132 | ||||
-rw-r--r-- | src/libcamera/software_isp/debayer.h | 54 | ||||
-rw-r--r-- | src/libcamera/software_isp/debayer_cpu.cpp | 807 | ||||
-rw-r--r-- | src/libcamera/software_isp/debayer_cpu.h | 158 | ||||
-rw-r--r-- | src/libcamera/software_isp/meson.build | 15 | ||||
-rw-r--r-- | src/libcamera/software_isp/software_isp.cpp | 357 | ||||
-rw-r--r-- | src/libcamera/software_isp/swstats_cpu.cpp | 432 | ||||
-rw-r--r-- | src/libcamera/software_isp/swstats_cpu.h | 97 |
9 files changed, 2331 insertions, 0 deletions
diff --git a/src/libcamera/software_isp/TODO b/src/libcamera/software_isp/TODO new file mode 100644 index 00000000..4fcee39b --- /dev/null +++ b/src/libcamera/software_isp/TODO @@ -0,0 +1,279 @@ +1. Setting F_SEAL_SHRINK and F_SEAL_GROW after ftruncate() + +>> SharedMem::SharedMem(const std::string &name, std::size_t size) +>> : name_(name), size_(size), mem_(nullptr) +>> +>> ... +>> +>> if (ftruncate(fd_.get(), size_) < 0) +>> return; +> +> Should we set the GROW and SHRINK seals (in a separate patch) ? + +Yes, this can be done. +Setting F_SEAL_SHRINK and F_SEAL_GROW after the ftruncate() call above could catch +some potential errors related to improper access to the shared memory allocated by +the SharedMemObject. + +--- + +2. Reconsider stats sharing + +>>> +void SwStatsCpu::finishFrame(void) +>>> +{ +>>> + *sharedStats_ = stats_; +>> +>> Is it more efficient to copy the stats instead of operating directly on +>> the shared memory ? +> +> I inherited doing things this way from Andrey. I kept this because +> we don't really have any synchronization with the IPA reading this. +> +> So the idea is to only touch this when the next set of statistics +> is ready since we don't know when the IPA is done with accessing +> the previous set of statistics ... +> +> This is both something which seems mostly a theoretic problem, +> yet also definitely something which I think we need to fix. +> +> Maybe use a ringbuffer of stats buffers and pass the index into +> the ringbuffer to the emit signal ? + +That would match how we deal with hardware ISPs, and I think that's a +good idea. It will help decoupling the processing side from the IPA. + +--- + +3. Remove statsReady signal + +> class SwStatsCpu +> { +> /** +> * \brief Signals that the statistics are ready +> */ +> Signal<> statsReady; + +But better, I wonder if the signal could be dropped completely. The +SwStatsCpu class does not operate asynchronously. Shouldn't whoever +calls the finishFrame() function then handle emitting the signal ? + +Now, the trouble is that this would be the DebayerCpu class, whose name +doesn't indicate as a prime candidate to handle stats. However, it +already exposes a getStatsFD() function, so we're already calling for +trouble :-) Either that should be moved to somewhere else, or the class +should be renamed. Considering that the class applies colour gains in +addition to performing the interpolation, it may be more of a naming +issue. + +Removing the signal and refactoring those classes doesn't have to be +addressed now, I think it would be part of a larger refactoring +(possibly also considering platforms that have no ISP but can produce +stats in hardware, such as the i.MX7), but please keep it on your radar. + +--- + +4. Hide internal representation of gains from callers + +> struct DebayerParams { +> static constexpr unsigned int kGain10 = 256; + +Forcing the caller to deal with the internal representation of gains +isn't nice, especially given that it precludes implementing gains of +different precisions in different backend. Wouldn't it be better to pass +the values as floating point numbers, and convert them to the internal +representation in the implementation of process() before using them ? + +--- + +5. Store ISP parameters in per-frame buffers + +> /** +> * \fn void Debayer::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) +> * \brief Process the bayer data into the requested format. +> * \param[in] input The input buffer. +> * \param[in] output The output buffer. +> * \param[in] params The parameters to be used in debayering. +> * +> * \note DebayerParams is passed by value deliberately so that a copy is passed +> * when this is run in another thread by invokeMethod(). +> */ + +Possibly something to address later, by storing ISP parameters in +per-frame buffers like we do for hardware ISPs. + +--- + +6. Input buffer copying configuration + +> DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats) +> : stats_(std::move(stats)), gammaCorrection_(1.0) +> { +> enableInputMemcpy_ = true; + +Set this appropriately and/or make it configurable. + +--- + +7. Performance measurement configuration + +> void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) +> /* Measure before emitting signals */ +> if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure && +> ++measuredFrames_ > DebayerCpu::kFramesToSkip) { +> timespec frameEndTime = {}; +> clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime); +> frameProcessTime_ += timeDiff(frameEndTime, frameStartTime); +> if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) { +> const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure - +> DebayerCpu::kFramesToSkip; +> LOG(Debayer, Info) +> << "Processed " << measuredFrames +> << " frames in " << frameProcessTime_ / 1000 << "us, " +> << frameProcessTime_ / (1000 * measuredFrames) +> << " us/frame"; +> } +> } + +I wonder if there would be a way to control at runtime when/how to +perform those measurements. Maybe that's a bit overkill. + +--- + +8. DebayerCpu cleanups + +> >> class DebayerCpu : public Debayer, public Object +> >> const SharedFD &getStatsFD() { return stats_->getStatsFD(); } +> > +> > This, +> +> Note the statistics pass-through stuff is sort of a necessary evil +> since we want one main loop going over the data line by line and +> doing both debayering as well as stats while the line is still +> hot in the l2 cache. And things like the process2() and process4() +> loops are highly CPU debayering specific so I don't think we should +> move those out of the CpuDebayer code. + +Yes, that I understood from the review. "necessary evil" is indeed the +right term :-) I expect it will take quite some design skills to balance +the need for performances and the need for a maintainable architecture. + +> > plus the fact that this class handles colour gains and gamma, +> > makes me thing we have either a naming issue, or an architecture issue. +> +> I agree that this does a bit more then debayering, although +> the debayering really is the main thing it does. +> +> I guess the calculation of the rgb lookup tables which do the +> color gains and gamma could be moved outside of this class, +> that might even be beneficial for GPU based debayering assuming +> that that is going to use rgb lookup tables too (it could +> implement actual color gains + gamma correction in some different +> way). +> +> I think this falls under the lets wait until we have a GPU +> based SoftISP MVP/POC and then do some refactoring to see which +> bits should go where. + +--- + +8. Decouple pipeline and IPA naming + +> The current src/ipa/meson.build assumes the IPA name to match the +> pipeline name. For this reason "-Dipas=simple" is used for the +> Soft IPA module. + +This should be addressed. + +--- + +9. Doxyfile cleanup + +>> diff --git a/Documentation/Doxyfile.in b/Documentation/Doxyfile.in +>> index a86ea6c1..2be8d47b 100644 +>> --- a/Documentation/Doxyfile.in +>> +++ b/Documentation/Doxyfile.in +>> @@ -44,6 +44,7 @@ EXCLUDE = @TOP_SRCDIR@/include/libcamera/base/span.h \ +>> @TOP_SRCDIR@/src/libcamera/pipeline/ \ +>> @TOP_SRCDIR@/src/libcamera/tracepoints.cpp \ +>> @TOP_BUILDDIR@/include/libcamera/internal/tracepoints.h \ +>> + @TOP_BUILDDIR@/include/libcamera/ipa/soft_ipa_interface.h \ +> Why is this needed ? +> +>> @TOP_BUILDDIR@/src/libcamera/proxy/ +>> EXCLUDE_PATTERNS = @TOP_BUILDDIR@/include/libcamera/ipa/*_serializer.h \ +>> diff --git a/include/libcamera/ipa/meson.build b/include/libcamera/ipa/meson.build +>> index f3b4881c..3352d08f 100644 +>> --- a/include/libcamera/ipa/meson.build +>> +++ b/include/libcamera/ipa/meson.build +>> @@ -65,6 +65,7 @@ pipeline_ipa_mojom_mapping = { +>> 'ipu3': 'ipu3.mojom', +>> 'rkisp1': 'rkisp1.mojom', +>> 'rpi/vc4': 'raspberrypi.mojom', +>> + 'simple': 'soft.mojom', +>> 'vimc': 'vimc.mojom', +>> } +>> diff --git a/include/libcamera/ipa/soft.mojom b/include/libcamera/ipa/soft.mojom +>> new file mode 100644 +>> index 00000000..c249bd75 +>> --- /dev/null +>> +++ b/include/libcamera/ipa/soft.mojom +>> @@ -0,0 +1,28 @@ +>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +>> + +>> +/* +>> + * \todo Document the interface and remove the related EXCLUDE_PATTERNS entry. +> Ah that's why. + +Yes, because, well... all the other IPAs were doing that... + +> It doesn't have to be done before merging, but could you +> address this sooner than later ? + +--- + +10. Switch to libipa/algorithm.h API in processStats + +>> void IPASoftSimple::processStats(const ControlList &sensorControls) +>> +> Do you envision switching to the libipa/algorithm.h API at some point ? + +At some point, yes. + +--- + +11. Improve handling the sensor controls which take effect with a delay + +> void IPASoftSimple::processStats(const ControlList &sensorControls) +> { +> ... +> /* +> * AE / AGC, use 2 frames delay to make sure that the exposure and +> * the gain set have applied to the camera sensor. +> */ +> if (ignore_updates_ > 0) { +> --ignore_updates_; +> return; +> } + +This could be handled better with DelayedControls. + +--- + +12. Use DelayedControls class in ispStatsReady() + +> void SimpleCameraData::ispStatsReady() +> { +> swIsp_->processStats(sensor_->getControls({ V4L2_CID_ANALOGUE_GAIN, +> V4L2_CID_EXPOSURE })); + +You should use the DelayedControls class. + +--- + +13. Improve black level and colour gains application + +I think the black level should eventually be moved before debayering, and +ideally the colour gains as well. I understand the need for optimizations to +lower the CPU consumption, but at the same time I don't feel comfortable +building up on top of an implementation that may work a bit more by chance than +by correctness, as that's not very maintainable. diff --git a/src/libcamera/software_isp/debayer.cpp b/src/libcamera/software_isp/debayer.cpp new file mode 100644 index 00000000..efe75ea8 --- /dev/null +++ b/src/libcamera/software_isp/debayer.cpp @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * debayer base class + */ + +#include "debayer.h" + +namespace libcamera { + +/** + * \struct DebayerParams + * \brief Struct to hold the debayer parameters. + */ + +/** + * \var DebayerParams::kGain10 + * \brief const value for 1.0 gain + */ + +/** + * \var DebayerParams::gainR + * \brief Red gain + * + * 128 = 0.5, 256 = 1.0, 512 = 2.0, etc. + */ + +/** + * \var DebayerParams::gainG + * \brief Green gain + * + * 128 = 0.5, 256 = 1.0, 512 = 2.0, etc. + */ + +/** + * \var DebayerParams::gainB + * \brief Blue gain + * + * 128 = 0.5, 256 = 1.0, 512 = 2.0, etc. + */ + +/** + * \var DebayerParams::gamma + * \brief Gamma correction, 1.0 is no correction + */ + +/** + * \class Debayer + * \brief Base debayering class + * + * Base class that provides functions for setting up the debayering process. + */ + +LOG_DEFINE_CATEGORY(Debayer) + +Debayer::~Debayer() +{ +} + +/** + * \fn int Debayer::configure(const StreamConfiguration &inputCfg, const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) + * \brief Configure the debayer object according to the passed in parameters. + * \param[in] inputCfg The input configuration. + * \param[in] outputCfgs The output configurations. + * + * \return 0 on success, a negative errno on failure. + */ + +/** + * \fn Size Debayer::patternSize(PixelFormat inputFormat) + * \brief Get the width and height at which the bayer pattern repeats. + * \param[in] inputFormat The input format. + * + * Valid sizes are: 2x2, 4x2 or 4x4. + * + * \return Pattern size or an empty size for unsupported inputFormats. + */ + +/** + * \fn std::vector<PixelFormat> Debayer::formats(PixelFormat inputFormat) + * \brief Get the supported output formats. + * \param[in] inputFormat The input format. + * + * \return All supported output formats or an empty vector if there are none. + */ + +/** + * \fn std::tuple<unsigned int, unsigned int> Debayer::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) + * \brief Get the stride and the frame size. + * \param[in] outputFormat The output format. + * \param[in] size The output size. + * + * \return A tuple of the stride and the frame size, or a tuple with 0,0 if + * there is no valid output config. + */ + +/** + * \fn void Debayer::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) + * \brief Process the bayer data into the requested format. + * \param[in] input The input buffer. + * \param[in] output The output buffer. + * \param[in] params The parameters to be used in debayering. + * + * \note DebayerParams is passed by value deliberately so that a copy is passed + * when this is run in another thread by invokeMethod(). + */ + +/** + * \fn virtual SizeRange Debayer::sizes(PixelFormat inputFormat, const Size &inputSize) + * \brief Get the supported output sizes for the given input format and size. + * \param[in] inputFormat The input format. + * \param[in] inputSize The input size. + * + * \return The valid size ranges or an empty range if there are none. + */ + +/** + * \var Signal<FrameBuffer *> Debayer::inputBufferReady + * \brief Signals when the input buffer is ready. + */ + +/** + * \var Signal<FrameBuffer *> Debayer::outputBufferReady + * \brief Signals when the output buffer is ready. + */ + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/debayer.h b/src/libcamera/software_isp/debayer.h new file mode 100644 index 00000000..c151fe5d --- /dev/null +++ b/src/libcamera/software_isp/debayer.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * debayering base class + */ + +#pragma once + +#include <stdint.h> + +#include <libcamera/base/log.h> +#include <libcamera/base/signal.h> + +#include <libcamera/geometry.h> +#include <libcamera/stream.h> + +#include "libcamera/internal/software_isp/debayer_params.h" + +namespace libcamera { + +class FrameBuffer; + +LOG_DECLARE_CATEGORY(Debayer) + +class Debayer +{ +public: + virtual ~Debayer() = 0; + + virtual int configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) = 0; + + virtual std::vector<PixelFormat> formats(PixelFormat inputFormat) = 0; + + virtual std::tuple<unsigned int, unsigned int> + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) = 0; + + virtual void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) = 0; + + virtual SizeRange sizes(PixelFormat inputFormat, const Size &inputSize) = 0; + + Signal<FrameBuffer *> inputBufferReady; + Signal<FrameBuffer *> outputBufferReady; + +private: + virtual Size patternSize(PixelFormat inputFormat) = 0; +}; + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp new file mode 100644 index 00000000..8254bbe9 --- /dev/null +++ b/src/libcamera/software_isp/debayer_cpu.cpp @@ -0,0 +1,807 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * CPU based debayering class + */ + +#include "debayer_cpu.h" + +#include <math.h> +#include <stdlib.h> +#include <time.h> + +#include <libcamera/formats.h> + +#include "libcamera/internal/bayer_format.h" +#include "libcamera/internal/framebuffer.h" +#include "libcamera/internal/mapped_framebuffer.h" + +namespace libcamera { + +/** + * \class DebayerCpu + * \brief Class for debayering on the CPU + * + * Implementation for CPU based debayering + */ + +/** + * \brief Constructs a DebayerCpu object + * \param[in] stats Pointer to the stats object to use + */ +DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats) + : stats_(std::move(stats)), gammaCorrection_(1.0), blackLevel_(0) +{ + /* + * Reading from uncached buffers may be very slow. + * In such a case, it's better to copy input buffer data to normal memory. + * But in case of cached buffers, copying the data is unnecessary overhead. + * enable_input_memcpy_ makes this behavior configurable. At the moment, we + * always set it to true as the safer choice but this should be changed in + * future. + */ + enableInputMemcpy_ = true; + + /* Initialize gamma to 1.0 curve */ + for (unsigned int i = 0; i < kGammaLookupSize; i++) + gamma_[i] = i / (kGammaLookupSize / kRGBLookupSize); + + for (unsigned int i = 0; i < kMaxLineBuffers; i++) + lineBuffers_[i] = nullptr; +} + +DebayerCpu::~DebayerCpu() +{ + for (unsigned int i = 0; i < kMaxLineBuffers; i++) + free(lineBuffers_[i]); +} + +#define DECLARE_SRC_POINTERS(pixel_t) \ + const pixel_t *prev = (const pixel_t *)src[0] + xShift_; \ + const pixel_t *curr = (const pixel_t *)src[1] + xShift_; \ + const pixel_t *next = (const pixel_t *)src[2] + xShift_; + +/* + * RGR + * GBG + * RGR + */ +#define BGGR_BGR888(p, n, div) \ + *dst++ = blue_[curr[x] / (div)]; \ + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ + *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ + x++; + +/* + * GBG + * RGR + * GBG + */ +#define GRBG_BGR888(p, n, div) \ + *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \ + *dst++ = green_[curr[x] / (div)]; \ + *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ + x++; + +/* + * GRG + * BGB + * GRG + */ +#define GBRG_BGR888(p, n, div) \ + *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \ + *dst++ = green_[curr[x] / (div)]; \ + *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \ + x++; + +/* + * BGB + * GRG + * BGB + */ +#define RGGB_BGR888(p, n, div) \ + *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \ + *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \ + *dst++ = red_[curr[x] / (div)]; \ + x++; + +void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + DECLARE_SRC_POINTERS(uint8_t) + + for (int x = 0; x < (int)window_.width;) { + BGGR_BGR888(1, 1, 1) + GBRG_BGR888(1, 1, 1) + } +} + +void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + DECLARE_SRC_POINTERS(uint8_t) + + for (int x = 0; x < (int)window_.width;) { + GRBG_BGR888(1, 1, 1) + RGGB_BGR888(1, 1, 1) + } +} + +void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + DECLARE_SRC_POINTERS(uint16_t) + + for (int x = 0; x < (int)window_.width;) { + /* divide values by 4 for 10 -> 8 bpp value */ + BGGR_BGR888(1, 1, 4) + GBRG_BGR888(1, 1, 4) + } +} + +void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + DECLARE_SRC_POINTERS(uint16_t) + + for (int x = 0; x < (int)window_.width;) { + /* divide values by 4 for 10 -> 8 bpp value */ + GRBG_BGR888(1, 1, 4) + RGGB_BGR888(1, 1, 4) + } +} + +void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + DECLARE_SRC_POINTERS(uint16_t) + + for (int x = 0; x < (int)window_.width;) { + /* divide values by 16 for 12 -> 8 bpp value */ + BGGR_BGR888(1, 1, 16) + GBRG_BGR888(1, 1, 16) + } +} + +void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + DECLARE_SRC_POINTERS(uint16_t) + + for (int x = 0; x < (int)window_.width;) { + /* divide values by 16 for 12 -> 8 bpp value */ + GRBG_BGR888(1, 1, 16) + RGGB_BGR888(1, 1, 16) + } +} + +void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int widthInBytes = window_.width * 5 / 4; + const uint8_t *prev = src[0]; + const uint8_t *curr = src[1]; + const uint8_t *next = src[2]; + + /* + * For the first pixel getting a pixel from the previous column uses + * x - 2 to skip the 5th byte with least-significant bits for 4 pixels. + * Same for last pixel (uses x + 2) and looking at the next column. + */ + for (int x = 0; x < widthInBytes;) { + /* First pixel */ + BGGR_BGR888(2, 1, 1) + /* Second pixel BGGR -> GBRG */ + GBRG_BGR888(1, 1, 1) + /* Same thing for third and fourth pixels */ + BGGR_BGR888(1, 1, 1) + GBRG_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int widthInBytes = window_.width * 5 / 4; + const uint8_t *prev = src[0]; + const uint8_t *curr = src[1]; + const uint8_t *next = src[2]; + + for (int x = 0; x < widthInBytes;) { + /* First pixel */ + GRBG_BGR888(2, 1, 1) + /* Second pixel GRBG -> RGGB */ + RGGB_BGR888(1, 1, 1) + /* Same thing for third and fourth pixels */ + GRBG_BGR888(1, 1, 1) + RGGB_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int widthInBytes = window_.width * 5 / 4; + const uint8_t *prev = src[0]; + const uint8_t *curr = src[1]; + const uint8_t *next = src[2]; + + for (int x = 0; x < widthInBytes;) { + /* Even pixel */ + GBRG_BGR888(2, 1, 1) + /* Odd pixel GBGR -> BGGR */ + BGGR_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + GBRG_BGR888(1, 1, 1) + BGGR_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]) +{ + const int widthInBytes = window_.width * 5 / 4; + const uint8_t *prev = src[0]; + const uint8_t *curr = src[1]; + const uint8_t *next = src[2]; + + for (int x = 0; x < widthInBytes;) { + /* Even pixel */ + RGGB_BGR888(2, 1, 1) + /* Odd pixel RGGB -> GRBG */ + GRBG_BGR888(1, 1, 1) + /* Same thing for next 2 pixels */ + RGGB_BGR888(1, 1, 1) + GRBG_BGR888(1, 2, 1) + /* Skip 5th src byte with 4 x 2 least-significant-bits */ + x++; + } +} + +static bool isStandardBayerOrder(BayerFormat::Order order) +{ + return order == BayerFormat::BGGR || order == BayerFormat::GBRG || + order == BayerFormat::GRBG || order == BayerFormat::RGGB; +} + +/* + * Setup the Debayer object according to the passed in parameters. + * Return 0 on success, a negative errno value on failure + * (unsupported parameters). + */ +int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputFormat); + + if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) && + bayerFormat.packing == BayerFormat::Packing::None && + isStandardBayerOrder(bayerFormat.order)) { + config.bpp = (bayerFormat.bitDepth + 7) & ~7; + config.patternSize.width = 2; + config.patternSize.height = 2; + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 }); + return 0; + } + + if (bayerFormat.bitDepth == 10 && + bayerFormat.packing == BayerFormat::Packing::CSI2 && + isStandardBayerOrder(bayerFormat.order)) { + config.bpp = 10; + config.patternSize.width = 4; /* 5 bytes per *4* pixels */ + config.patternSize.height = 2; + config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 }); + return 0; + } + + LOG(Debayer, Info) + << "Unsupported input format " << inputFormat.toString(); + return -EINVAL; +} + +int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config) +{ + if (outputFormat == formats::RGB888 || outputFormat == formats::BGR888) { + config.bpp = 24; + return 0; + } + + LOG(Debayer, Info) + << "Unsupported output format " << outputFormat.toString(); + return -EINVAL; +} + +/* + * Check for standard Bayer orders and set xShift_ and swap debayer0/1, so that + * a single pair of BGGR debayer functions can be used for all 4 standard orders. + */ +int DebayerCpu::setupStandardBayerOrder(BayerFormat::Order order) +{ + switch (order) { + case BayerFormat::BGGR: + break; + case BayerFormat::GBRG: + xShift_ = 1; /* BGGR -> GBRG */ + break; + case BayerFormat::GRBG: + std::swap(debayer0_, debayer1_); /* BGGR -> GRBG */ + break; + case BayerFormat::RGGB: + xShift_ = 1; /* BGGR -> GBRG */ + std::swap(debayer0_, debayer1_); /* GBRG -> RGGB */ + break; + default: + return -EINVAL; + } + + return 0; +} + +int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputFormat); + + xShift_ = 0; + swapRedBlueGains_ = false; + + auto invalidFmt = []() -> int { + LOG(Debayer, Error) << "Unsupported input output format combination"; + return -EINVAL; + }; + + switch (outputFormat) { + case formats::RGB888: + break; + case formats::BGR888: + /* Swap R and B in bayer order to generate BGR888 instead of RGB888 */ + swapRedBlueGains_ = true; + + switch (bayerFormat.order) { + case BayerFormat::BGGR: + bayerFormat.order = BayerFormat::RGGB; + break; + case BayerFormat::GBRG: + bayerFormat.order = BayerFormat::GRBG; + break; + case BayerFormat::GRBG: + bayerFormat.order = BayerFormat::GBRG; + break; + case BayerFormat::RGGB: + bayerFormat.order = BayerFormat::BGGR; + break; + default: + return invalidFmt(); + } + break; + default: + return invalidFmt(); + } + + if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) && + bayerFormat.packing == BayerFormat::Packing::None && + isStandardBayerOrder(bayerFormat.order)) { + switch (bayerFormat.bitDepth) { + case 8: + debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888; + debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888; + break; + case 10: + debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888; + debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888; + break; + case 12: + debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888; + debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888; + break; + } + setupStandardBayerOrder(bayerFormat.order); + return 0; + } + + if (bayerFormat.bitDepth == 10 && + bayerFormat.packing == BayerFormat::Packing::CSI2) { + switch (bayerFormat.order) { + case BayerFormat::BGGR: + debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888; + debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888; + return 0; + case BayerFormat::GBRG: + debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888; + debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888; + return 0; + case BayerFormat::GRBG: + debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888; + debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888; + return 0; + case BayerFormat::RGGB: + debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888; + debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888; + return 0; + default: + break; + } + } + + return invalidFmt(); +} + +int DebayerCpu::configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) +{ + if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0) + return -EINVAL; + + if (stats_->configure(inputCfg) != 0) + return -EINVAL; + + const Size &statsPatternSize = stats_->patternSize(); + if (inputConfig_.patternSize.width != statsPatternSize.width || + inputConfig_.patternSize.height != statsPatternSize.height) { + LOG(Debayer, Error) + << "mismatching stats and debayer pattern sizes for " + << inputCfg.pixelFormat.toString(); + return -EINVAL; + } + + inputConfig_.stride = inputCfg.stride; + + if (outputCfgs.size() != 1) { + LOG(Debayer, Error) + << "Unsupported number of output streams: " + << outputCfgs.size(); + return -EINVAL; + } + + const StreamConfiguration &outputCfg = outputCfgs[0]; + SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size); + std::tie(outputConfig_.stride, outputConfig_.frameSize) = + strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size); + + if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) { + LOG(Debayer, Error) + << "Invalid output size/stride: " + << "\n " << outputCfg.size << " (" << outSizeRange << ")" + << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")"; + return -EINVAL; + } + + if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0) + return -EINVAL; + + window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) & + ~(inputConfig_.patternSize.width - 1); + window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) & + ~(inputConfig_.patternSize.height - 1); + window_.width = outputCfg.size.width; + window_.height = outputCfg.size.height; + + /* Don't pass x,y since process() already adjusts src before passing it */ + stats_->setWindow(Rectangle(window_.size())); + + /* pad with patternSize.Width on both left and right side */ + lineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8; + lineBufferLength_ = window_.width * inputConfig_.bpp / 8 + + 2 * lineBufferPadding_; + for (unsigned int i = 0; + i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_; + i++) { + free(lineBuffers_[i]); + lineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_); + if (!lineBuffers_[i]) + return -ENOMEM; + } + + measuredFrames_ = 0; + frameProcessTime_ = 0; + + return 0; +} + +/* + * Get width and height at which the bayer-pattern repeats. + * Return pattern-size or an empty Size for an unsupported inputFormat. + */ +Size DebayerCpu::patternSize(PixelFormat inputFormat) +{ + DebayerCpu::DebayerInputConfig config; + + if (getInputConfig(inputFormat, config) != 0) + return {}; + + return config.patternSize; +} + +std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat) +{ + DebayerCpu::DebayerInputConfig config; + + if (getInputConfig(inputFormat, config) != 0) + return std::vector<PixelFormat>(); + + return config.outputFormats; +} + +std::tuple<unsigned int, unsigned int> +DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) +{ + DebayerCpu::DebayerOutputConfig config; + + if (getOutputConfig(outputFormat, config) != 0) + return std::make_tuple(0, 0); + + /* round up to multiple of 8 for 64 bits alignment */ + unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7; + + return std::make_tuple(stride, stride * size.height); +} + +void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[]) +{ + const unsigned int patternHeight = inputConfig_.patternSize.height; + + if (!enableInputMemcpy_) + return; + + for (unsigned int i = 0; i < patternHeight; i++) { + memcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_, + lineBufferLength_); + linePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_; + } + + /* Point lineBufferIndex_ to first unused lineBuffer */ + lineBufferIndex_ = patternHeight; +} + +void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src) +{ + const unsigned int patternHeight = inputConfig_.patternSize.height; + + for (unsigned int i = 0; i < patternHeight; i++) + linePointers[i] = linePointers[i + 1]; + + linePointers[patternHeight] = src + + (patternHeight / 2) * (int)inputConfig_.stride; +} + +void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[]) +{ + const unsigned int patternHeight = inputConfig_.patternSize.height; + + if (!enableInputMemcpy_) + return; + + memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_, + lineBufferLength_); + linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_; + + lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1); +} + +void DebayerCpu::process2(const uint8_t *src, uint8_t *dst) +{ + unsigned int yEnd = window_.y + window_.height; + /* Holds [0] previous- [1] current- [2] next-line */ + const uint8_t *linePointers[3]; + + /* Adjust src to top left corner of the window */ + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; + + /* [x] becomes [x - 1] after initial shiftLinePointers() call */ + if (window_.y) { + linePointers[1] = src - inputConfig_.stride; /* previous-line */ + linePointers[2] = src; + } else { + /* window_.y == 0, use the next line as prev line */ + linePointers[1] = src + inputConfig_.stride; + linePointers[2] = src; + /* Last 2 lines also need special handling */ + yEnd -= 2; + } + + setupInputMemcpy(linePointers); + + for (unsigned int y = window_.y; y < yEnd; y += 2) { + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine0(y, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } + + if (window_.y == 0) { + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine0(yEnd, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + /* next line may point outside of src, use prev. */ + linePointers[2] = linePointers[0]; + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } +} + +void DebayerCpu::process4(const uint8_t *src, uint8_t *dst) +{ + const unsigned int yEnd = window_.y + window_.height; + /* + * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line + * [3] 1-line-down [4] 2-lines-down. + */ + const uint8_t *linePointers[5]; + + /* Adjust src to top left corner of the window */ + src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8; + + /* [x] becomes [x - 1] after initial shiftLinePointers() call */ + linePointers[1] = src - 2 * inputConfig_.stride; + linePointers[2] = src - inputConfig_.stride; + linePointers[3] = src; + linePointers[4] = src + inputConfig_.stride; + + setupInputMemcpy(linePointers); + + for (unsigned int y = window_.y; y < yEnd; y += 4) { + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine0(y, linePointers); + (this->*debayer0_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + (this->*debayer1_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + stats_->processLine2(y, linePointers); + (this->*debayer2_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + + shiftLinePointers(linePointers, src); + memcpyNextLine(linePointers); + (this->*debayer3_)(dst, linePointers); + src += inputConfig_.stride; + dst += outputConfig_.stride; + } +} + +static inline int64_t timeDiff(timespec &after, timespec &before) +{ + return (after.tv_sec - before.tv_sec) * 1000000000LL + + (int64_t)after.tv_nsec - (int64_t)before.tv_nsec; +} + +void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) +{ + timespec frameStartTime; + + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) { + frameStartTime = {}; + clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime); + } + + /* Apply DebayerParams */ + if (params.gamma != gammaCorrection_ || params.blackLevel != blackLevel_) { + const unsigned int blackIndex = + params.blackLevel * kGammaLookupSize / 256; + std::fill(gamma_.begin(), gamma_.begin() + blackIndex, 0); + const float divisor = kGammaLookupSize - blackIndex - 1.0; + for (unsigned int i = blackIndex; i < kGammaLookupSize; i++) + gamma_[i] = UINT8_MAX * powf((i - blackIndex) / divisor, params.gamma); + + gammaCorrection_ = params.gamma; + blackLevel_ = params.blackLevel; + } + + if (swapRedBlueGains_) + std::swap(params.gainR, params.gainB); + + for (unsigned int i = 0; i < kRGBLookupSize; i++) { + constexpr unsigned int div = + kRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize; + unsigned int idx; + + /* Apply gamma after gain! */ + idx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) }); + red_[i] = gamma_[idx]; + + idx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) }); + green_[i] = gamma_[idx]; + + idx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) }); + blue_[i] = gamma_[idx]; + } + + /* Copy metadata from the input buffer */ + FrameMetadata &metadata = output->_d()->metadata(); + metadata.status = input->metadata().status; + metadata.sequence = input->metadata().sequence; + metadata.timestamp = input->metadata().timestamp; + + MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read); + MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write); + if (!in.isValid() || !out.isValid()) { + LOG(Debayer, Error) << "mmap-ing buffer(s) failed"; + metadata.status = FrameMetadata::FrameError; + return; + } + + stats_->startFrame(); + + if (inputConfig_.patternSize.height == 2) + process2(in.planes()[0].data(), out.planes()[0].data()); + else + process4(in.planes()[0].data(), out.planes()[0].data()); + + metadata.planes()[0].bytesused = out.planes()[0].size(); + + /* Measure before emitting signals */ + if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure && + ++measuredFrames_ > DebayerCpu::kFramesToSkip) { + timespec frameEndTime = {}; + clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime); + frameProcessTime_ += timeDiff(frameEndTime, frameStartTime); + if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) { + const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure - + DebayerCpu::kFramesToSkip; + LOG(Debayer, Info) + << "Processed " << measuredFrames + << " frames in " << frameProcessTime_ / 1000 << "us, " + << frameProcessTime_ / (1000 * measuredFrames) + << " us/frame"; + } + } + + stats_->finishFrame(); + outputBufferReady.emit(output); + inputBufferReady.emit(input); +} + +SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize) +{ + Size patternSize = this->patternSize(inputFormat); + unsigned int borderHeight = patternSize.height; + + if (patternSize.isNull()) + return {}; + + /* No need for top/bottom border with a pattern height of 2 */ + if (patternSize.height == 2) + borderHeight = 0; + + /* + * For debayer interpolation a border is kept around the entire image + * and the minimum output size is pattern-height x pattern-width. + */ + if (inputSize.width < (3 * patternSize.width) || + inputSize.height < (2 * borderHeight + patternSize.height)) { + LOG(Debayer, Warning) + << "Input format size too small: " << inputSize.toString(); + return {}; + } + + return SizeRange(Size(patternSize.width, patternSize.height), + Size((inputSize.width - 2 * patternSize.width) & ~(patternSize.width - 1), + (inputSize.height - 2 * borderHeight) & ~(patternSize.height - 1)), + patternSize.width, patternSize.height); +} + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h new file mode 100644 index 00000000..de216fe3 --- /dev/null +++ b/src/libcamera/software_isp/debayer_cpu.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * CPU based debayering header + */ + +#pragma once + +#include <memory> +#include <stdint.h> +#include <vector> + +#include <libcamera/base/object.h> + +#include "libcamera/internal/bayer_format.h" + +#include "debayer.h" +#include "swstats_cpu.h" + +namespace libcamera { + +class DebayerCpu : public Debayer, public Object +{ +public: + DebayerCpu(std::unique_ptr<SwStatsCpu> stats); + ~DebayerCpu(); + + int configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs); + Size patternSize(PixelFormat inputFormat); + std::vector<PixelFormat> formats(PixelFormat input); + std::tuple<unsigned int, unsigned int> + strideAndFrameSize(const PixelFormat &outputFormat, const Size &size); + void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params); + SizeRange sizes(PixelFormat inputFormat, const Size &inputSize); + + /** + * \brief Get the file descriptor for the statistics + * + * \return the file descriptor pointing to the statistics + */ + const SharedFD &getStatsFD() { return stats_->getStatsFD(); } + + /** + * \brief Get the output frame size + * + * \return The output frame size + */ + unsigned int frameSize() { return outputConfig_.frameSize; } + +private: + /** + * \brief Called to debayer 1 line of Bayer input data to output format + * \param[out] dst Pointer to the start of the output line to write + * \param[in] src The input data + * + * Input data is an array of (patternSize_.height + 1) src + * pointers each pointing to a line in the Bayer source. The middle + * element of the array will point to the actual line being processed. + * Earlier element(s) will point to the previous line(s) and later + * element(s) to the next line(s). + * + * These functions take an array of src pointers, rather than + * a single src pointer + a stride for the source, so that when the src + * is slow uncached memory it can be copied to faster memory before + * debayering. Debayering a standard 2x2 Bayer pattern requires access + * to the previous and next src lines for interpolating the missing + * colors. To allow copying the src lines only once 3 temporary buffers + * each holding a single line are used, re-using the oldest buffer for + * the next line and the pointers are swizzled so that: + * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line. + * This way the 3 pointers passed to the debayer functions form + * a sliding window over the src avoiding the need to copy each + * line more than once. + * + * Similarly for bayer patterns which repeat every 4 lines, 5 src + * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up + * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down. + */ + using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]); + + /* 8-bit raw bayer format */ + void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); + /* unpacked 10-bit raw bayer format */ + void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); + /* unpacked 12-bit raw bayer format */ + void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); + /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */ + void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]); + void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]); + + struct DebayerInputConfig { + Size patternSize; + unsigned int bpp; /* Memory used per pixel, not precision */ + unsigned int stride; + std::vector<PixelFormat> outputFormats; + }; + + struct DebayerOutputConfig { + unsigned int bpp; /* Memory used per pixel, not precision */ + unsigned int stride; + unsigned int frameSize; + }; + + int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config); + int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config); + int setupStandardBayerOrder(BayerFormat::Order order); + int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat); + void setupInputMemcpy(const uint8_t *linePointers[]); + void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src); + void memcpyNextLine(const uint8_t *linePointers[]); + void process2(const uint8_t *src, uint8_t *dst); + void process4(const uint8_t *src, uint8_t *dst); + + static constexpr unsigned int kGammaLookupSize = 1024; + static constexpr unsigned int kRGBLookupSize = 256; + /* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */ + static constexpr unsigned int kMaxLineBuffers = 5; + + std::array<uint8_t, kGammaLookupSize> gamma_; + std::array<uint8_t, kRGBLookupSize> red_; + std::array<uint8_t, kRGBLookupSize> green_; + std::array<uint8_t, kRGBLookupSize> blue_; + debayerFn debayer0_; + debayerFn debayer1_; + debayerFn debayer2_; + debayerFn debayer3_; + Rectangle window_; + DebayerInputConfig inputConfig_; + DebayerOutputConfig outputConfig_; + std::unique_ptr<SwStatsCpu> stats_; + uint8_t *lineBuffers_[kMaxLineBuffers]; + unsigned int lineBufferLength_; + unsigned int lineBufferPadding_; + unsigned int lineBufferIndex_; + unsigned int xShift_; /* Offset of 0/1 applied to window_.x */ + bool enableInputMemcpy_; + bool swapRedBlueGains_; + float gammaCorrection_; + unsigned int blackLevel_; + unsigned int measuredFrames_; + int64_t frameProcessTime_; + /* Skip 30 frames for things to stabilize then measure 30 frames */ + static constexpr unsigned int kFramesToSkip = 30; + static constexpr unsigned int kLastFrameToMeasure = 60; +}; + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build new file mode 100644 index 00000000..f7c66e28 --- /dev/null +++ b/src/libcamera/software_isp/meson.build @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: CC0-1.0 + +softisp_enabled = pipelines.contains('simple') +summary({'SoftISP support' : softisp_enabled}, section : 'Configuration') + +if not softisp_enabled + subdir_done() +endif + +libcamera_sources += files([ + 'debayer.cpp', + 'debayer_cpu.cpp', + 'software_isp.cpp', + 'swstats_cpu.cpp', +]) diff --git a/src/libcamera/software_isp/software_isp.cpp b/src/libcamera/software_isp/software_isp.cpp new file mode 100644 index 00000000..c9b6be56 --- /dev/null +++ b/src/libcamera/software_isp/software_isp.cpp @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * + * Simple software ISP implementation + */ + +#include "libcamera/internal/software_isp/software_isp.h" + +#include <sys/mman.h> +#include <sys/types.h> +#include <unistd.h> + +#include <libcamera/formats.h> +#include <libcamera/stream.h> + +#include "libcamera/internal/bayer_format.h" +#include "libcamera/internal/framebuffer.h" +#include "libcamera/internal/ipa_manager.h" +#include "libcamera/internal/mapped_framebuffer.h" + +#include "debayer_cpu.h" + +/** + * \file software_isp.cpp + * \brief Simple software ISP implementation + */ + +namespace libcamera { + +LOG_DEFINE_CATEGORY(SoftwareIsp) + +/** + * \class SoftwareIsp + * \brief Class for the Software ISP + */ + +/** + * \var SoftwareIsp::inputBufferReady + * \brief A signal emitted when the input frame buffer completes + */ + +/** + * \var SoftwareIsp::outputBufferReady + * \brief A signal emitted when the output frame buffer completes + */ + +/** + * \var SoftwareIsp::ispStatsReady + * \brief A signal emitted when the statistics for IPA are ready + */ + +/** + * \var SoftwareIsp::setSensorControls + * \brief A signal emitted when the values to write to the sensor controls are + * ready + */ + +/** + * \brief Constructs SoftwareIsp object + * \param[in] pipe The pipeline handler in use + * \param[in] sensor Pointer to the CameraSensor instance owned by the pipeline + * handler + */ +SoftwareIsp::SoftwareIsp(PipelineHandler *pipe, const CameraSensor *sensor) + : debayerParams_{ DebayerParams::kGain10, DebayerParams::kGain10, + DebayerParams::kGain10, 0.5f, 0 }, + dmaHeap_(DmaHeap::DmaHeapFlag::Cma | DmaHeap::DmaHeapFlag::System) +{ + if (!dmaHeap_.isValid()) { + LOG(SoftwareIsp, Error) << "Failed to create DmaHeap object"; + return; + } + + sharedParams_ = SharedMemObject<DebayerParams>("softIsp_params"); + if (!sharedParams_) { + LOG(SoftwareIsp, Error) << "Failed to create shared memory for parameters"; + return; + } + + auto stats = std::make_unique<SwStatsCpu>(); + if (!stats->isValid()) { + LOG(SoftwareIsp, Error) << "Failed to create SwStatsCpu object"; + return; + } + stats->statsReady.connect(this, &SoftwareIsp::statsReady); + + debayer_ = std::make_unique<DebayerCpu>(std::move(stats)); + debayer_->inputBufferReady.connect(this, &SoftwareIsp::inputReady); + debayer_->outputBufferReady.connect(this, &SoftwareIsp::outputReady); + + ipa_ = IPAManager::createIPA<ipa::soft::IPAProxySoft>(pipe, 0, 0); + if (!ipa_) { + LOG(SoftwareIsp, Error) + << "Creating IPA for software ISP failed"; + debayer_.reset(); + return; + } + + /* + * The API tuning file is made from the sensor name. If the tuning file + * isn't found, fall back to the 'uncalibrated' file. + */ + std::string ipaTuningFile = ipa_->configurationFile(sensor->model() + ".yaml"); + if (ipaTuningFile.empty()) + ipaTuningFile = ipa_->configurationFile("uncalibrated.yaml"); + + int ret = ipa_->init(IPASettings{ ipaTuningFile, sensor->model() }, + debayer_->getStatsFD(), + sharedParams_.fd(), + sensor->controls()); + if (ret) { + LOG(SoftwareIsp, Error) << "IPA init failed"; + debayer_.reset(); + return; + } + + ipa_->setIspParams.connect(this, &SoftwareIsp::saveIspParams); + ipa_->setSensorControls.connect(this, &SoftwareIsp::setSensorCtrls); + + debayer_->moveToThread(&ispWorkerThread_); +} + +SoftwareIsp::~SoftwareIsp() +{ + /* make sure to destroy the DebayerCpu before the ispWorkerThread_ is gone */ + debayer_.reset(); +} + +/** + * \fn int SoftwareIsp::loadConfiguration([[maybe_unused]] const std::string &filename) + * \brief Load a configuration from a file + * \param[in] filename The file to load the configuration data from + * + * Currently is a stub doing nothing and always returning "success". + * + * \return 0 on success + */ + +/** + * \brief Process the statistics gathered + * \param[in] sensorControls The sensor controls + * + * Requests the IPA to calculate new parameters for ISP and new control + * values for the sensor. + */ +void SoftwareIsp::processStats(const ControlList &sensorControls) +{ + ASSERT(ipa_); + ipa_->processStats(sensorControls); +} + +/** + * \brief Check the validity of Software Isp object + * \return True if Software Isp is valid, false otherwise + */ +bool SoftwareIsp::isValid() const +{ + return !!debayer_; +} + +/** + * \brief Get the output formats supported for the given input format + * \param[in] inputFormat The input format + * \return All the supported output formats or an empty vector if there are none + */ +std::vector<PixelFormat> SoftwareIsp::formats(PixelFormat inputFormat) +{ + ASSERT(debayer_); + + return debayer_->formats(inputFormat); +} + +/** + * \brief Get the supported output sizes for the given input format and size + * \param[in] inputFormat The input format + * \param[in] inputSize The input frame size + * \return The valid size range or an empty range if there are none + */ +SizeRange SoftwareIsp::sizes(PixelFormat inputFormat, const Size &inputSize) +{ + ASSERT(debayer_); + + return debayer_->sizes(inputFormat, inputSize); +} + +/** + * Get the output stride and the frame size in bytes for the given output format and size + * \param[in] outputFormat The output format + * \param[in] size The output size (width and height in pixels) + * \return A tuple of the stride and the frame size in bytes, or a tuple of 0,0 + * if there is no valid output config + */ +std::tuple<unsigned int, unsigned int> +SoftwareIsp::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) +{ + ASSERT(debayer_); + + return debayer_->strideAndFrameSize(outputFormat, size); +} + +/** + * \brief Configure the SoftwareIsp object according to the passed in parameters + * \param[in] inputCfg The input configuration + * \param[in] outputCfgs The output configurations + * \param[in] sensorControls ControlInfoMap of the controls supported by the sensor + * \return 0 on success, a negative errno on failure + */ +int SoftwareIsp::configure(const StreamConfiguration &inputCfg, + const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs, + const ControlInfoMap &sensorControls) +{ + ASSERT(ipa_ && debayer_); + + int ret = ipa_->configure(sensorControls); + if (ret < 0) + return ret; + + return debayer_->configure(inputCfg, outputCfgs); +} + +/** + * \brief Export the buffers from the Software ISP + * \param[in] output Output stream index exporting the buffers + * \param[in] count Number of buffers to allocate + * \param[out] buffers Vector to store the allocated buffers + * \return The number of allocated buffers on success or a negative error code + * otherwise + */ +int SoftwareIsp::exportBuffers(unsigned int output, unsigned int count, + std::vector<std::unique_ptr<FrameBuffer>> *buffers) +{ + ASSERT(debayer_ != nullptr); + + /* single output for now */ + if (output >= 1) + return -EINVAL; + + for (unsigned int i = 0; i < count; i++) { + const std::string name = "frame-" + std::to_string(i); + const size_t frameSize = debayer_->frameSize(); + + FrameBuffer::Plane outPlane; + outPlane.fd = SharedFD(dmaHeap_.alloc(name.c_str(), frameSize)); + if (!outPlane.fd.isValid()) { + LOG(SoftwareIsp, Error) + << "failed to allocate a dma_buf"; + return -ENOMEM; + } + outPlane.offset = 0; + outPlane.length = frameSize; + + std::vector<FrameBuffer::Plane> planes{ outPlane }; + buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes))); + } + + return count; +} + +/** + * \brief Queue buffers to Software ISP + * \param[in] input The input framebuffer + * \param[in] outputs The container holding the output stream indexes and + * their respective frame buffer outputs + * \return 0 on success, a negative errno on failure + */ +int SoftwareIsp::queueBuffers(FrameBuffer *input, + const std::map<unsigned int, FrameBuffer *> &outputs) +{ + unsigned int mask = 0; + + /* + * Validate the outputs as a sanity check: at least one output is + * required, all outputs must reference a valid stream and no two + * outputs can reference the same stream. + */ + if (outputs.empty()) + return -EINVAL; + + for (auto [index, buffer] : outputs) { + if (!buffer) + return -EINVAL; + if (index >= 1) /* only single stream atm */ + return -EINVAL; + if (mask & (1 << index)) + return -EINVAL; + + mask |= 1 << index; + } + + process(input, outputs.at(0)); + + return 0; +} + +/** + * \brief Starts the Software ISP streaming operation + * \return 0 on success, any other value indicates an error + */ +int SoftwareIsp::start() +{ + int ret = ipa_->start(); + if (ret) + return ret; + + ispWorkerThread_.start(); + return 0; +} + +/** + * \brief Stops the Software ISP streaming operation + */ +void SoftwareIsp::stop() +{ + ispWorkerThread_.exit(); + ispWorkerThread_.wait(); + + ipa_->stop(); +} + +/** + * \brief Passes the input framebuffer to the ISP worker to process + * \param[in] input The input framebuffer + * \param[out] output The framebuffer to write the processed frame to + */ +void SoftwareIsp::process(FrameBuffer *input, FrameBuffer *output) +{ + debayer_->invokeMethod(&DebayerCpu::process, + ConnectionTypeQueued, input, output, debayerParams_); +} + +void SoftwareIsp::saveIspParams() +{ + debayerParams_ = *sharedParams_; +} + +void SoftwareIsp::setSensorCtrls(const ControlList &sensorControls) +{ + setSensorControls.emit(sensorControls); +} + +void SoftwareIsp::statsReady() +{ + ispStatsReady.emit(); +} + +void SoftwareIsp::inputReady(FrameBuffer *input) +{ + inputBufferReady.emit(input); +} + +void SoftwareIsp::outputReady(FrameBuffer *output) +{ + outputBufferReady.emit(output); +} + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp new file mode 100644 index 00000000..815c4d4f --- /dev/null +++ b/src/libcamera/software_isp/swstats_cpu.cpp @@ -0,0 +1,432 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * CPU based software statistics implementation + */ + +#include "swstats_cpu.h" + +#include <libcamera/base/log.h> + +#include <libcamera/stream.h> + +#include "libcamera/internal/bayer_format.h" + +namespace libcamera { + +/** + * \class SwStatsCpu + * \brief Class for gathering statistics on the CPU + * + * CPU based software ISP statistics implementation. + * + * This class offers a configure function + functions to gather statistics on a + * line by line basis. This allows CPU based software debayering to interleave + * debayering and statistics gathering on a line by line basis while the input + * data is still hot in the cache. + * + * It is also possible to specify a window over which to gather statistics + * instead of processing the whole frame. + */ + +/** + * \fn bool SwStatsCpu::isValid() const + * \brief Gets whether the statistics object is valid + * + * \return True if it's valid, false otherwise + */ + +/** + * \fn const SharedFD &SwStatsCpu::getStatsFD() + * \brief Get the file descriptor for the statistics + * + * \return The file descriptor + */ + +/** + * \fn const Size &SwStatsCpu::patternSize() + * \brief Get the pattern size + * + * For some input-formats, e.g. Bayer data, processing is done multiple lines + * and/or columns at a time. Get width and height at which the (bayer) pattern + * repeats. Window values are rounded down to a multiple of this and the height + * also indicates if processLine2() should be called or not. + * This may only be called after a successful configure() call. + * + * \return The pattern size + */ + +/** + * \fn void SwStatsCpu::processLine0(unsigned int y, const uint8_t *src[]) + * \brief Process line 0 + * \param[in] y The y coordinate. + * \param[in] src The input data. + * + * This function processes line 0 for input formats with + * patternSize height == 1. + * It'll process line 0 and 1 for input formats with patternSize height >= 2. + * This function may only be called after a successful setWindow() call. + */ + +/** + * \fn void SwStatsCpu::processLine2(unsigned int y, const uint8_t *src[]) + * \brief Process line 2 and 3 + * \param[in] y The y coordinate. + * \param[in] src The input data. + * + * This function processes line 2 and 3 for input formats with + * patternSize height == 4. + * This function may only be called after a successful setWindow() call. + */ + +/** + * \var Signal<> SwStatsCpu::statsReady + * \brief Signals that the statistics are ready + */ + +/** + * \typedef SwStatsCpu::statsProcessFn + * \brief Called when there is data to get statistics from + * \param[in] src The input data + * + * These functions take an array of (patternSize_.height + 1) src + * pointers each pointing to a line in the source image. The middle + * element of the array will point to the actual line being processed. + * Earlier element(s) will point to the previous line(s) and later + * element(s) to the next line(s). + * + * See the documentation of DebayerCpu::debayerFn for more details. + */ + +/** + * \var unsigned int SwStatsCpu::ySkipMask_ + * \brief Skip lines where this bitmask is set in y + */ + +/** + * \var Rectangle SwStatsCpu::window_ + * \brief Statistics window, set by setWindow(), used every line + */ + +/** + * \var Size SwStatsCpu::patternSize_ + * \brief The size of the bayer pattern + * + * Valid sizes are: 2x2, 4x2 or 4x4. + */ + +/** + * \var unsigned int SwStatsCpu::xShift_ + * \brief The offset of x, applied to window_.x for bayer variants + * + * This can either be 0 or 1. + */ + +LOG_DEFINE_CATEGORY(SwStatsCpu) + +SwStatsCpu::SwStatsCpu() + : sharedStats_("softIsp_stats") +{ + if (!sharedStats_) + LOG(SwStatsCpu, Error) + << "Failed to create shared memory for statistics"; +} + +static constexpr unsigned int kRedYMul = 77; /* 0.299 * 256 */ +static constexpr unsigned int kGreenYMul = 150; /* 0.587 * 256 */ +static constexpr unsigned int kBlueYMul = 29; /* 0.114 * 256 */ + +#define SWSTATS_START_LINE_STATS(pixel_t) \ + pixel_t r, g, g2, b; \ + uint64_t yVal; \ + \ + uint64_t sumR = 0; \ + uint64_t sumG = 0; \ + uint64_t sumB = 0; + +#define SWSTATS_ACCUMULATE_LINE_STATS(div) \ + sumR += r; \ + sumG += g; \ + sumB += b; \ + \ + yVal = r * kRedYMul; \ + yVal += g * kGreenYMul; \ + yVal += b * kBlueYMul; \ + stats_.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++; + +#define SWSTATS_FINISH_LINE_STATS() \ + stats_.sumR_ += sumR; \ + stats_.sumG_ += sumG; \ + stats_.sumB_ += sumB; + +void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[]) +{ + const uint8_t *src0 = src[1] + window_.x; + const uint8_t *src1 = src[2] + window_.x; + + SWSTATS_START_LINE_STATS(uint8_t) + + if (swapLines_) + std::swap(src0, src1); + + /* x += 4 sample every other 2x2 block */ + for (int x = 0; x < (int)window_.width; x += 4) { + b = src0[x]; + g = src0[x + 1]; + g2 = src1[x]; + r = src1[x + 1]; + + g = (g + g2) / 2; + + SWSTATS_ACCUMULATE_LINE_STATS(1) + } + + SWSTATS_FINISH_LINE_STATS() +} + +void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[]) +{ + const uint16_t *src0 = (const uint16_t *)src[1] + window_.x; + const uint16_t *src1 = (const uint16_t *)src[2] + window_.x; + + SWSTATS_START_LINE_STATS(uint16_t) + + if (swapLines_) + std::swap(src0, src1); + + /* x += 4 sample every other 2x2 block */ + for (int x = 0; x < (int)window_.width; x += 4) { + b = src0[x]; + g = src0[x + 1]; + g2 = src1[x]; + r = src1[x + 1]; + + g = (g + g2) / 2; + + /* divide Y by 4 for 10 -> 8 bpp value */ + SWSTATS_ACCUMULATE_LINE_STATS(4) + } + + SWSTATS_FINISH_LINE_STATS() +} + +void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[]) +{ + const uint16_t *src0 = (const uint16_t *)src[1] + window_.x; + const uint16_t *src1 = (const uint16_t *)src[2] + window_.x; + + SWSTATS_START_LINE_STATS(uint16_t) + + if (swapLines_) + std::swap(src0, src1); + + /* x += 4 sample every other 2x2 block */ + for (int x = 0; x < (int)window_.width; x += 4) { + b = src0[x]; + g = src0[x + 1]; + g2 = src1[x]; + r = src1[x + 1]; + + g = (g + g2) / 2; + + /* divide Y by 16 for 12 -> 8 bpp value */ + SWSTATS_ACCUMULATE_LINE_STATS(16) + } + + SWSTATS_FINISH_LINE_STATS() +} + +void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[]) +{ + const uint8_t *src0 = src[1] + window_.x * 5 / 4; + const uint8_t *src1 = src[2] + window_.x * 5 / 4; + const int widthInBytes = window_.width * 5 / 4; + + if (swapLines_) + std::swap(src0, src1); + + SWSTATS_START_LINE_STATS(uint8_t) + + /* x += 5 sample every other 2x2 block */ + for (int x = 0; x < widthInBytes; x += 5) { + /* BGGR */ + b = src0[x]; + g = src0[x + 1]; + g2 = src1[x]; + r = src1[x + 1]; + g = (g + g2) / 2; + /* Data is already 8 bits, divide by 1 */ + SWSTATS_ACCUMULATE_LINE_STATS(1) + } + + SWSTATS_FINISH_LINE_STATS() +} + +void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[]) +{ + const uint8_t *src0 = src[1] + window_.x * 5 / 4; + const uint8_t *src1 = src[2] + window_.x * 5 / 4; + const int widthInBytes = window_.width * 5 / 4; + + if (swapLines_) + std::swap(src0, src1); + + SWSTATS_START_LINE_STATS(uint8_t) + + /* x += 5 sample every other 2x2 block */ + for (int x = 0; x < widthInBytes; x += 5) { + /* GBRG */ + g = src0[x]; + b = src0[x + 1]; + r = src1[x]; + g2 = src1[x + 1]; + g = (g + g2) / 2; + /* Data is already 8 bits, divide by 1 */ + SWSTATS_ACCUMULATE_LINE_STATS(1) + } + + SWSTATS_FINISH_LINE_STATS() +} + +/** + * \brief Reset state to start statistics gathering for a new frame + * + * This may only be called after a successful setWindow() call. + */ +void SwStatsCpu::startFrame(void) +{ + if (window_.width == 0) + LOG(SwStatsCpu, Error) << "Calling startFrame() without setWindow()"; + + stats_.sumR_ = 0; + stats_.sumB_ = 0; + stats_.sumG_ = 0; + stats_.yHistogram.fill(0); +} + +/** + * \brief Finish statistics calculation for the current frame + * + * This may only be called after a successful setWindow() call. + */ +void SwStatsCpu::finishFrame(void) +{ + *sharedStats_ = stats_; + statsReady.emit(); +} + +/** + * \brief Setup SwStatsCpu object for standard Bayer orders + * \param[in] order The Bayer order + * + * Check if order is a standard Bayer order and setup xShift_ and swapLines_ + * so that a single BGGR stats function can be used for all 4 standard orders. + */ +int SwStatsCpu::setupStandardBayerOrder(BayerFormat::Order order) +{ + switch (order) { + case BayerFormat::BGGR: + xShift_ = 0; + swapLines_ = false; + break; + case BayerFormat::GBRG: + xShift_ = 1; /* BGGR -> GBRG */ + swapLines_ = false; + break; + case BayerFormat::GRBG: + xShift_ = 0; + swapLines_ = true; /* BGGR -> GRBG */ + break; + case BayerFormat::RGGB: + xShift_ = 1; /* BGGR -> GBRG */ + swapLines_ = true; /* GBRG -> RGGB */ + break; + default: + return -EINVAL; + } + + patternSize_.height = 2; + patternSize_.width = 2; + ySkipMask_ = 0x02; /* Skip every 3th and 4th line */ + return 0; +} + +/** + * \brief Configure the statistics object for the passed in input format + * \param[in] inputCfg The input format + * + * \return 0 on success, a negative errno value on failure + */ +int SwStatsCpu::configure(const StreamConfiguration &inputCfg) +{ + BayerFormat bayerFormat = + BayerFormat::fromPixelFormat(inputCfg.pixelFormat); + + if (bayerFormat.packing == BayerFormat::Packing::None && + setupStandardBayerOrder(bayerFormat.order) == 0) { + switch (bayerFormat.bitDepth) { + case 8: + stats0_ = &SwStatsCpu::statsBGGR8Line0; + return 0; + case 10: + stats0_ = &SwStatsCpu::statsBGGR10Line0; + return 0; + case 12: + stats0_ = &SwStatsCpu::statsBGGR12Line0; + return 0; + } + } + + if (bayerFormat.bitDepth == 10 && + bayerFormat.packing == BayerFormat::Packing::CSI2) { + patternSize_.height = 2; + patternSize_.width = 4; /* 5 bytes per *4* pixels */ + /* Skip every 3th and 4th line, sample every other 2x2 block */ + ySkipMask_ = 0x02; + xShift_ = 0; + + switch (bayerFormat.order) { + case BayerFormat::BGGR: + case BayerFormat::GRBG: + stats0_ = &SwStatsCpu::statsBGGR10PLine0; + swapLines_ = bayerFormat.order == BayerFormat::GRBG; + return 0; + case BayerFormat::GBRG: + case BayerFormat::RGGB: + stats0_ = &SwStatsCpu::statsGBRG10PLine0; + swapLines_ = bayerFormat.order == BayerFormat::RGGB; + return 0; + default: + break; + } + } + + LOG(SwStatsCpu, Info) + << "Unsupported input format " << inputCfg.pixelFormat.toString(); + return -EINVAL; +} + +/** + * \brief Specify window coordinates over which to gather statistics + * \param[in] window The window object. + */ +void SwStatsCpu::setWindow(const Rectangle &window) +{ + window_ = window; + + window_.x &= ~(patternSize_.width - 1); + window_.x += xShift_; + window_.y &= ~(patternSize_.height - 1); + + /* width_ - xShift_ to make sure the window fits */ + window_.width -= xShift_; + window_.width &= ~(patternSize_.width - 1); + window_.height &= ~(patternSize_.height - 1); +} + +} /* namespace libcamera */ diff --git a/src/libcamera/software_isp/swstats_cpu.h b/src/libcamera/software_isp/swstats_cpu.h new file mode 100644 index 00000000..363e326f --- /dev/null +++ b/src/libcamera/software_isp/swstats_cpu.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ +/* + * Copyright (C) 2023, Linaro Ltd + * Copyright (C) 2023, Red Hat Inc. + * + * Authors: + * Hans de Goede <hdegoede@redhat.com> + * + * CPU based software statistics implementation + */ + +#pragma once + +#include <stdint.h> + +#include <libcamera/base/signal.h> + +#include <libcamera/geometry.h> + +#include "libcamera/internal/bayer_format.h" +#include "libcamera/internal/shared_mem_object.h" +#include "libcamera/internal/software_isp/swisp_stats.h" + +namespace libcamera { + +class PixelFormat; +struct StreamConfiguration; + +class SwStatsCpu +{ +public: + SwStatsCpu(); + ~SwStatsCpu() = default; + + bool isValid() const { return sharedStats_.fd().isValid(); } + + const SharedFD &getStatsFD() { return sharedStats_.fd(); } + + const Size &patternSize() { return patternSize_; } + + int configure(const StreamConfiguration &inputCfg); + void setWindow(const Rectangle &window); + void startFrame(); + void finishFrame(); + + void processLine0(unsigned int y, const uint8_t *src[]) + { + if ((y & ySkipMask_) || y < static_cast<unsigned int>(window_.y) || + y >= (window_.y + window_.height)) + return; + + (this->*stats0_)(src); + } + + void processLine2(unsigned int y, const uint8_t *src[]) + { + if ((y & ySkipMask_) || y < static_cast<unsigned int>(window_.y) || + y >= (window_.y + window_.height)) + return; + + (this->*stats2_)(src); + } + + Signal<> statsReady; + +private: + using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]); + + int setupStandardBayerOrder(BayerFormat::Order order); + /* Bayer 8 bpp unpacked */ + void statsBGGR8Line0(const uint8_t *src[]); + /* Bayer 10 bpp unpacked */ + void statsBGGR10Line0(const uint8_t *src[]); + /* Bayer 12 bpp unpacked */ + void statsBGGR12Line0(const uint8_t *src[]); + /* Bayer 10 bpp packed */ + void statsBGGR10PLine0(const uint8_t *src[]); + void statsGBRG10PLine0(const uint8_t *src[]); + + /* Variables set by configure(), used every line */ + statsProcessFn stats0_; + statsProcessFn stats2_; + bool swapLines_; + + unsigned int ySkipMask_; + + Rectangle window_; + + Size patternSize_; + + unsigned int xShift_; + + SharedMemObject<SwIspStats> sharedStats_; + SwIspStats stats_; +}; + +} /* namespace libcamera */ |