summaryrefslogtreecommitdiff
path: root/src/libcamera/software_isp
diff options
context:
space:
mode:
Diffstat (limited to 'src/libcamera/software_isp')
-rw-r--r--src/libcamera/software_isp/TODO279
-rw-r--r--src/libcamera/software_isp/debayer.cpp132
-rw-r--r--src/libcamera/software_isp/debayer.h54
-rw-r--r--src/libcamera/software_isp/debayer_cpu.cpp807
-rw-r--r--src/libcamera/software_isp/debayer_cpu.h158
-rw-r--r--src/libcamera/software_isp/meson.build15
-rw-r--r--src/libcamera/software_isp/software_isp.cpp357
-rw-r--r--src/libcamera/software_isp/swstats_cpu.cpp432
-rw-r--r--src/libcamera/software_isp/swstats_cpu.h97
9 files changed, 2331 insertions, 0 deletions
diff --git a/src/libcamera/software_isp/TODO b/src/libcamera/software_isp/TODO
new file mode 100644
index 00000000..4fcee39b
--- /dev/null
+++ b/src/libcamera/software_isp/TODO
@@ -0,0 +1,279 @@
+1. Setting F_SEAL_SHRINK and F_SEAL_GROW after ftruncate()
+
+>> SharedMem::SharedMem(const std::string &name, std::size_t size)
+>> : name_(name), size_(size), mem_(nullptr)
+>>
+>> ...
+>>
+>> if (ftruncate(fd_.get(), size_) < 0)
+>> return;
+>
+> Should we set the GROW and SHRINK seals (in a separate patch) ?
+
+Yes, this can be done.
+Setting F_SEAL_SHRINK and F_SEAL_GROW after the ftruncate() call above could catch
+some potential errors related to improper access to the shared memory allocated by
+the SharedMemObject.
+
+---
+
+2. Reconsider stats sharing
+
+>>> +void SwStatsCpu::finishFrame(void)
+>>> +{
+>>> + *sharedStats_ = stats_;
+>>
+>> Is it more efficient to copy the stats instead of operating directly on
+>> the shared memory ?
+>
+> I inherited doing things this way from Andrey. I kept this because
+> we don't really have any synchronization with the IPA reading this.
+>
+> So the idea is to only touch this when the next set of statistics
+> is ready since we don't know when the IPA is done with accessing
+> the previous set of statistics ...
+>
+> This is both something which seems mostly a theoretic problem,
+> yet also definitely something which I think we need to fix.
+>
+> Maybe use a ringbuffer of stats buffers and pass the index into
+> the ringbuffer to the emit signal ?
+
+That would match how we deal with hardware ISPs, and I think that's a
+good idea. It will help decoupling the processing side from the IPA.
+
+---
+
+3. Remove statsReady signal
+
+> class SwStatsCpu
+> {
+> /**
+> * \brief Signals that the statistics are ready
+> */
+> Signal<> statsReady;
+
+But better, I wonder if the signal could be dropped completely. The
+SwStatsCpu class does not operate asynchronously. Shouldn't whoever
+calls the finishFrame() function then handle emitting the signal ?
+
+Now, the trouble is that this would be the DebayerCpu class, whose name
+doesn't indicate as a prime candidate to handle stats. However, it
+already exposes a getStatsFD() function, so we're already calling for
+trouble :-) Either that should be moved to somewhere else, or the class
+should be renamed. Considering that the class applies colour gains in
+addition to performing the interpolation, it may be more of a naming
+issue.
+
+Removing the signal and refactoring those classes doesn't have to be
+addressed now, I think it would be part of a larger refactoring
+(possibly also considering platforms that have no ISP but can produce
+stats in hardware, such as the i.MX7), but please keep it on your radar.
+
+---
+
+4. Hide internal representation of gains from callers
+
+> struct DebayerParams {
+> static constexpr unsigned int kGain10 = 256;
+
+Forcing the caller to deal with the internal representation of gains
+isn't nice, especially given that it precludes implementing gains of
+different precisions in different backend. Wouldn't it be better to pass
+the values as floating point numbers, and convert them to the internal
+representation in the implementation of process() before using them ?
+
+---
+
+5. Store ISP parameters in per-frame buffers
+
+> /**
+> * \fn void Debayer::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
+> * \brief Process the bayer data into the requested format.
+> * \param[in] input The input buffer.
+> * \param[in] output The output buffer.
+> * \param[in] params The parameters to be used in debayering.
+> *
+> * \note DebayerParams is passed by value deliberately so that a copy is passed
+> * when this is run in another thread by invokeMethod().
+> */
+
+Possibly something to address later, by storing ISP parameters in
+per-frame buffers like we do for hardware ISPs.
+
+---
+
+6. Input buffer copying configuration
+
+> DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)
+> : stats_(std::move(stats)), gammaCorrection_(1.0)
+> {
+> enableInputMemcpy_ = true;
+
+Set this appropriately and/or make it configurable.
+
+---
+
+7. Performance measurement configuration
+
+> void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
+> /* Measure before emitting signals */
+> if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
+> ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
+> timespec frameEndTime = {};
+> clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);
+> frameProcessTime_ += timeDiff(frameEndTime, frameStartTime);
+> if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) {
+> const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure -
+> DebayerCpu::kFramesToSkip;
+> LOG(Debayer, Info)
+> << "Processed " << measuredFrames
+> << " frames in " << frameProcessTime_ / 1000 << "us, "
+> << frameProcessTime_ / (1000 * measuredFrames)
+> << " us/frame";
+> }
+> }
+
+I wonder if there would be a way to control at runtime when/how to
+perform those measurements. Maybe that's a bit overkill.
+
+---
+
+8. DebayerCpu cleanups
+
+> >> class DebayerCpu : public Debayer, public Object
+> >> const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
+> >
+> > This,
+>
+> Note the statistics pass-through stuff is sort of a necessary evil
+> since we want one main loop going over the data line by line and
+> doing both debayering as well as stats while the line is still
+> hot in the l2 cache. And things like the process2() and process4()
+> loops are highly CPU debayering specific so I don't think we should
+> move those out of the CpuDebayer code.
+
+Yes, that I understood from the review. "necessary evil" is indeed the
+right term :-) I expect it will take quite some design skills to balance
+the need for performances and the need for a maintainable architecture.
+
+> > plus the fact that this class handles colour gains and gamma,
+> > makes me thing we have either a naming issue, or an architecture issue.
+>
+> I agree that this does a bit more then debayering, although
+> the debayering really is the main thing it does.
+>
+> I guess the calculation of the rgb lookup tables which do the
+> color gains and gamma could be moved outside of this class,
+> that might even be beneficial for GPU based debayering assuming
+> that that is going to use rgb lookup tables too (it could
+> implement actual color gains + gamma correction in some different
+> way).
+>
+> I think this falls under the lets wait until we have a GPU
+> based SoftISP MVP/POC and then do some refactoring to see which
+> bits should go where.
+
+---
+
+8. Decouple pipeline and IPA naming
+
+> The current src/ipa/meson.build assumes the IPA name to match the
+> pipeline name. For this reason "-Dipas=simple" is used for the
+> Soft IPA module.
+
+This should be addressed.
+
+---
+
+9. Doxyfile cleanup
+
+>> diff --git a/Documentation/Doxyfile.in b/Documentation/Doxyfile.in
+>> index a86ea6c1..2be8d47b 100644
+>> --- a/Documentation/Doxyfile.in
+>> +++ b/Documentation/Doxyfile.in
+>> @@ -44,6 +44,7 @@ EXCLUDE = @TOP_SRCDIR@/include/libcamera/base/span.h \
+>> @TOP_SRCDIR@/src/libcamera/pipeline/ \
+>> @TOP_SRCDIR@/src/libcamera/tracepoints.cpp \
+>> @TOP_BUILDDIR@/include/libcamera/internal/tracepoints.h \
+>> + @TOP_BUILDDIR@/include/libcamera/ipa/soft_ipa_interface.h \
+> Why is this needed ?
+>
+>> @TOP_BUILDDIR@/src/libcamera/proxy/
+>> EXCLUDE_PATTERNS = @TOP_BUILDDIR@/include/libcamera/ipa/*_serializer.h \
+>> diff --git a/include/libcamera/ipa/meson.build b/include/libcamera/ipa/meson.build
+>> index f3b4881c..3352d08f 100644
+>> --- a/include/libcamera/ipa/meson.build
+>> +++ b/include/libcamera/ipa/meson.build
+>> @@ -65,6 +65,7 @@ pipeline_ipa_mojom_mapping = {
+>> 'ipu3': 'ipu3.mojom',
+>> 'rkisp1': 'rkisp1.mojom',
+>> 'rpi/vc4': 'raspberrypi.mojom',
+>> + 'simple': 'soft.mojom',
+>> 'vimc': 'vimc.mojom',
+>> }
+>> diff --git a/include/libcamera/ipa/soft.mojom b/include/libcamera/ipa/soft.mojom
+>> new file mode 100644
+>> index 00000000..c249bd75
+>> --- /dev/null
+>> +++ b/include/libcamera/ipa/soft.mojom
+>> @@ -0,0 +1,28 @@
+>> +/* SPDX-License-Identifier: LGPL-2.1-or-later */
+>> +
+>> +/*
+>> + * \todo Document the interface and remove the related EXCLUDE_PATTERNS entry.
+> Ah that's why.
+
+Yes, because, well... all the other IPAs were doing that...
+
+> It doesn't have to be done before merging, but could you
+> address this sooner than later ?
+
+---
+
+10. Switch to libipa/algorithm.h API in processStats
+
+>> void IPASoftSimple::processStats(const ControlList &sensorControls)
+>>
+> Do you envision switching to the libipa/algorithm.h API at some point ?
+
+At some point, yes.
+
+---
+
+11. Improve handling the sensor controls which take effect with a delay
+
+> void IPASoftSimple::processStats(const ControlList &sensorControls)
+> {
+> ...
+> /*
+> * AE / AGC, use 2 frames delay to make sure that the exposure and
+> * the gain set have applied to the camera sensor.
+> */
+> if (ignore_updates_ > 0) {
+> --ignore_updates_;
+> return;
+> }
+
+This could be handled better with DelayedControls.
+
+---
+
+12. Use DelayedControls class in ispStatsReady()
+
+> void SimpleCameraData::ispStatsReady()
+> {
+> swIsp_->processStats(sensor_->getControls({ V4L2_CID_ANALOGUE_GAIN,
+> V4L2_CID_EXPOSURE }));
+
+You should use the DelayedControls class.
+
+---
+
+13. Improve black level and colour gains application
+
+I think the black level should eventually be moved before debayering, and
+ideally the colour gains as well. I understand the need for optimizations to
+lower the CPU consumption, but at the same time I don't feel comfortable
+building up on top of an implementation that may work a bit more by chance than
+by correctness, as that's not very maintainable.
diff --git a/src/libcamera/software_isp/debayer.cpp b/src/libcamera/software_isp/debayer.cpp
new file mode 100644
index 00000000..efe75ea8
--- /dev/null
+++ b/src/libcamera/software_isp/debayer.cpp
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ * Copyright (C) 2023, Red Hat Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ *
+ * debayer base class
+ */
+
+#include "debayer.h"
+
+namespace libcamera {
+
+/**
+ * \struct DebayerParams
+ * \brief Struct to hold the debayer parameters.
+ */
+
+/**
+ * \var DebayerParams::kGain10
+ * \brief const value for 1.0 gain
+ */
+
+/**
+ * \var DebayerParams::gainR
+ * \brief Red gain
+ *
+ * 128 = 0.5, 256 = 1.0, 512 = 2.0, etc.
+ */
+
+/**
+ * \var DebayerParams::gainG
+ * \brief Green gain
+ *
+ * 128 = 0.5, 256 = 1.0, 512 = 2.0, etc.
+ */
+
+/**
+ * \var DebayerParams::gainB
+ * \brief Blue gain
+ *
+ * 128 = 0.5, 256 = 1.0, 512 = 2.0, etc.
+ */
+
+/**
+ * \var DebayerParams::gamma
+ * \brief Gamma correction, 1.0 is no correction
+ */
+
+/**
+ * \class Debayer
+ * \brief Base debayering class
+ *
+ * Base class that provides functions for setting up the debayering process.
+ */
+
+LOG_DEFINE_CATEGORY(Debayer)
+
+Debayer::~Debayer()
+{
+}
+
+/**
+ * \fn int Debayer::configure(const StreamConfiguration &inputCfg, const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
+ * \brief Configure the debayer object according to the passed in parameters.
+ * \param[in] inputCfg The input configuration.
+ * \param[in] outputCfgs The output configurations.
+ *
+ * \return 0 on success, a negative errno on failure.
+ */
+
+/**
+ * \fn Size Debayer::patternSize(PixelFormat inputFormat)
+ * \brief Get the width and height at which the bayer pattern repeats.
+ * \param[in] inputFormat The input format.
+ *
+ * Valid sizes are: 2x2, 4x2 or 4x4.
+ *
+ * \return Pattern size or an empty size for unsupported inputFormats.
+ */
+
+/**
+ * \fn std::vector<PixelFormat> Debayer::formats(PixelFormat inputFormat)
+ * \brief Get the supported output formats.
+ * \param[in] inputFormat The input format.
+ *
+ * \return All supported output formats or an empty vector if there are none.
+ */
+
+/**
+ * \fn std::tuple<unsigned int, unsigned int> Debayer::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
+ * \brief Get the stride and the frame size.
+ * \param[in] outputFormat The output format.
+ * \param[in] size The output size.
+ *
+ * \return A tuple of the stride and the frame size, or a tuple with 0,0 if
+ * there is no valid output config.
+ */
+
+/**
+ * \fn void Debayer::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
+ * \brief Process the bayer data into the requested format.
+ * \param[in] input The input buffer.
+ * \param[in] output The output buffer.
+ * \param[in] params The parameters to be used in debayering.
+ *
+ * \note DebayerParams is passed by value deliberately so that a copy is passed
+ * when this is run in another thread by invokeMethod().
+ */
+
+/**
+ * \fn virtual SizeRange Debayer::sizes(PixelFormat inputFormat, const Size &inputSize)
+ * \brief Get the supported output sizes for the given input format and size.
+ * \param[in] inputFormat The input format.
+ * \param[in] inputSize The input size.
+ *
+ * \return The valid size ranges or an empty range if there are none.
+ */
+
+/**
+ * \var Signal<FrameBuffer *> Debayer::inputBufferReady
+ * \brief Signals when the input buffer is ready.
+ */
+
+/**
+ * \var Signal<FrameBuffer *> Debayer::outputBufferReady
+ * \brief Signals when the output buffer is ready.
+ */
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/debayer.h b/src/libcamera/software_isp/debayer.h
new file mode 100644
index 00000000..c151fe5d
--- /dev/null
+++ b/src/libcamera/software_isp/debayer.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ * Copyright (C) 2023, Red Hat Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ *
+ * debayering base class
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <libcamera/base/log.h>
+#include <libcamera/base/signal.h>
+
+#include <libcamera/geometry.h>
+#include <libcamera/stream.h>
+
+#include "libcamera/internal/software_isp/debayer_params.h"
+
+namespace libcamera {
+
+class FrameBuffer;
+
+LOG_DECLARE_CATEGORY(Debayer)
+
+class Debayer
+{
+public:
+ virtual ~Debayer() = 0;
+
+ virtual int configure(const StreamConfiguration &inputCfg,
+ const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs) = 0;
+
+ virtual std::vector<PixelFormat> formats(PixelFormat inputFormat) = 0;
+
+ virtual std::tuple<unsigned int, unsigned int>
+ strideAndFrameSize(const PixelFormat &outputFormat, const Size &size) = 0;
+
+ virtual void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params) = 0;
+
+ virtual SizeRange sizes(PixelFormat inputFormat, const Size &inputSize) = 0;
+
+ Signal<FrameBuffer *> inputBufferReady;
+ Signal<FrameBuffer *> outputBufferReady;
+
+private:
+ virtual Size patternSize(PixelFormat inputFormat) = 0;
+};
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/debayer_cpu.cpp b/src/libcamera/software_isp/debayer_cpu.cpp
new file mode 100644
index 00000000..8254bbe9
--- /dev/null
+++ b/src/libcamera/software_isp/debayer_cpu.cpp
@@ -0,0 +1,807 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ * Copyright (C) 2023, Red Hat Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ *
+ * CPU based debayering class
+ */
+
+#include "debayer_cpu.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <libcamera/formats.h>
+
+#include "libcamera/internal/bayer_format.h"
+#include "libcamera/internal/framebuffer.h"
+#include "libcamera/internal/mapped_framebuffer.h"
+
+namespace libcamera {
+
+/**
+ * \class DebayerCpu
+ * \brief Class for debayering on the CPU
+ *
+ * Implementation for CPU based debayering
+ */
+
+/**
+ * \brief Constructs a DebayerCpu object
+ * \param[in] stats Pointer to the stats object to use
+ */
+DebayerCpu::DebayerCpu(std::unique_ptr<SwStatsCpu> stats)
+ : stats_(std::move(stats)), gammaCorrection_(1.0), blackLevel_(0)
+{
+ /*
+ * Reading from uncached buffers may be very slow.
+ * In such a case, it's better to copy input buffer data to normal memory.
+ * But in case of cached buffers, copying the data is unnecessary overhead.
+ * enable_input_memcpy_ makes this behavior configurable. At the moment, we
+ * always set it to true as the safer choice but this should be changed in
+ * future.
+ */
+ enableInputMemcpy_ = true;
+
+ /* Initialize gamma to 1.0 curve */
+ for (unsigned int i = 0; i < kGammaLookupSize; i++)
+ gamma_[i] = i / (kGammaLookupSize / kRGBLookupSize);
+
+ for (unsigned int i = 0; i < kMaxLineBuffers; i++)
+ lineBuffers_[i] = nullptr;
+}
+
+DebayerCpu::~DebayerCpu()
+{
+ for (unsigned int i = 0; i < kMaxLineBuffers; i++)
+ free(lineBuffers_[i]);
+}
+
+#define DECLARE_SRC_POINTERS(pixel_t) \
+ const pixel_t *prev = (const pixel_t *)src[0] + xShift_; \
+ const pixel_t *curr = (const pixel_t *)src[1] + xShift_; \
+ const pixel_t *next = (const pixel_t *)src[2] + xShift_;
+
+/*
+ * RGR
+ * GBG
+ * RGR
+ */
+#define BGGR_BGR888(p, n, div) \
+ *dst++ = blue_[curr[x] / (div)]; \
+ *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
+ *dst++ = red_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
+ x++;
+
+/*
+ * GBG
+ * RGR
+ * GBG
+ */
+#define GRBG_BGR888(p, n, div) \
+ *dst++ = blue_[(prev[x] + next[x]) / (2 * (div))]; \
+ *dst++ = green_[curr[x] / (div)]; \
+ *dst++ = red_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
+ x++;
+
+/*
+ * GRG
+ * BGB
+ * GRG
+ */
+#define GBRG_BGR888(p, n, div) \
+ *dst++ = blue_[(curr[x - p] + curr[x + n]) / (2 * (div))]; \
+ *dst++ = green_[curr[x] / (div)]; \
+ *dst++ = red_[(prev[x] + next[x]) / (2 * (div))]; \
+ x++;
+
+/*
+ * BGB
+ * GRG
+ * BGB
+ */
+#define RGGB_BGR888(p, n, div) \
+ *dst++ = blue_[(prev[x - p] + prev[x + n] + next[x - p] + next[x + n]) / (4 * (div))]; \
+ *dst++ = green_[(prev[x] + curr[x - p] + curr[x + n] + next[x]) / (4 * (div))]; \
+ *dst++ = red_[curr[x] / (div)]; \
+ x++;
+
+void DebayerCpu::debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ DECLARE_SRC_POINTERS(uint8_t)
+
+ for (int x = 0; x < (int)window_.width;) {
+ BGGR_BGR888(1, 1, 1)
+ GBRG_BGR888(1, 1, 1)
+ }
+}
+
+void DebayerCpu::debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ DECLARE_SRC_POINTERS(uint8_t)
+
+ for (int x = 0; x < (int)window_.width;) {
+ GRBG_BGR888(1, 1, 1)
+ RGGB_BGR888(1, 1, 1)
+ }
+}
+
+void DebayerCpu::debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ DECLARE_SRC_POINTERS(uint16_t)
+
+ for (int x = 0; x < (int)window_.width;) {
+ /* divide values by 4 for 10 -> 8 bpp value */
+ BGGR_BGR888(1, 1, 4)
+ GBRG_BGR888(1, 1, 4)
+ }
+}
+
+void DebayerCpu::debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ DECLARE_SRC_POINTERS(uint16_t)
+
+ for (int x = 0; x < (int)window_.width;) {
+ /* divide values by 4 for 10 -> 8 bpp value */
+ GRBG_BGR888(1, 1, 4)
+ RGGB_BGR888(1, 1, 4)
+ }
+}
+
+void DebayerCpu::debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ DECLARE_SRC_POINTERS(uint16_t)
+
+ for (int x = 0; x < (int)window_.width;) {
+ /* divide values by 16 for 12 -> 8 bpp value */
+ BGGR_BGR888(1, 1, 16)
+ GBRG_BGR888(1, 1, 16)
+ }
+}
+
+void DebayerCpu::debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ DECLARE_SRC_POINTERS(uint16_t)
+
+ for (int x = 0; x < (int)window_.width;) {
+ /* divide values by 16 for 12 -> 8 bpp value */
+ GRBG_BGR888(1, 1, 16)
+ RGGB_BGR888(1, 1, 16)
+ }
+}
+
+void DebayerCpu::debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ const int widthInBytes = window_.width * 5 / 4;
+ const uint8_t *prev = src[0];
+ const uint8_t *curr = src[1];
+ const uint8_t *next = src[2];
+
+ /*
+ * For the first pixel getting a pixel from the previous column uses
+ * x - 2 to skip the 5th byte with least-significant bits for 4 pixels.
+ * Same for last pixel (uses x + 2) and looking at the next column.
+ */
+ for (int x = 0; x < widthInBytes;) {
+ /* First pixel */
+ BGGR_BGR888(2, 1, 1)
+ /* Second pixel BGGR -> GBRG */
+ GBRG_BGR888(1, 1, 1)
+ /* Same thing for third and fourth pixels */
+ BGGR_BGR888(1, 1, 1)
+ GBRG_BGR888(1, 2, 1)
+ /* Skip 5th src byte with 4 x 2 least-significant-bits */
+ x++;
+ }
+}
+
+void DebayerCpu::debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ const int widthInBytes = window_.width * 5 / 4;
+ const uint8_t *prev = src[0];
+ const uint8_t *curr = src[1];
+ const uint8_t *next = src[2];
+
+ for (int x = 0; x < widthInBytes;) {
+ /* First pixel */
+ GRBG_BGR888(2, 1, 1)
+ /* Second pixel GRBG -> RGGB */
+ RGGB_BGR888(1, 1, 1)
+ /* Same thing for third and fourth pixels */
+ GRBG_BGR888(1, 1, 1)
+ RGGB_BGR888(1, 2, 1)
+ /* Skip 5th src byte with 4 x 2 least-significant-bits */
+ x++;
+ }
+}
+
+void DebayerCpu::debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ const int widthInBytes = window_.width * 5 / 4;
+ const uint8_t *prev = src[0];
+ const uint8_t *curr = src[1];
+ const uint8_t *next = src[2];
+
+ for (int x = 0; x < widthInBytes;) {
+ /* Even pixel */
+ GBRG_BGR888(2, 1, 1)
+ /* Odd pixel GBGR -> BGGR */
+ BGGR_BGR888(1, 1, 1)
+ /* Same thing for next 2 pixels */
+ GBRG_BGR888(1, 1, 1)
+ BGGR_BGR888(1, 2, 1)
+ /* Skip 5th src byte with 4 x 2 least-significant-bits */
+ x++;
+ }
+}
+
+void DebayerCpu::debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[])
+{
+ const int widthInBytes = window_.width * 5 / 4;
+ const uint8_t *prev = src[0];
+ const uint8_t *curr = src[1];
+ const uint8_t *next = src[2];
+
+ for (int x = 0; x < widthInBytes;) {
+ /* Even pixel */
+ RGGB_BGR888(2, 1, 1)
+ /* Odd pixel RGGB -> GRBG */
+ GRBG_BGR888(1, 1, 1)
+ /* Same thing for next 2 pixels */
+ RGGB_BGR888(1, 1, 1)
+ GRBG_BGR888(1, 2, 1)
+ /* Skip 5th src byte with 4 x 2 least-significant-bits */
+ x++;
+ }
+}
+
+static bool isStandardBayerOrder(BayerFormat::Order order)
+{
+ return order == BayerFormat::BGGR || order == BayerFormat::GBRG ||
+ order == BayerFormat::GRBG || order == BayerFormat::RGGB;
+}
+
+/*
+ * Setup the Debayer object according to the passed in parameters.
+ * Return 0 on success, a negative errno value on failure
+ * (unsupported parameters).
+ */
+int DebayerCpu::getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config)
+{
+ BayerFormat bayerFormat =
+ BayerFormat::fromPixelFormat(inputFormat);
+
+ if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) &&
+ bayerFormat.packing == BayerFormat::Packing::None &&
+ isStandardBayerOrder(bayerFormat.order)) {
+ config.bpp = (bayerFormat.bitDepth + 7) & ~7;
+ config.patternSize.width = 2;
+ config.patternSize.height = 2;
+ config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
+ return 0;
+ }
+
+ if (bayerFormat.bitDepth == 10 &&
+ bayerFormat.packing == BayerFormat::Packing::CSI2 &&
+ isStandardBayerOrder(bayerFormat.order)) {
+ config.bpp = 10;
+ config.patternSize.width = 4; /* 5 bytes per *4* pixels */
+ config.patternSize.height = 2;
+ config.outputFormats = std::vector<PixelFormat>({ formats::RGB888, formats::BGR888 });
+ return 0;
+ }
+
+ LOG(Debayer, Info)
+ << "Unsupported input format " << inputFormat.toString();
+ return -EINVAL;
+}
+
+int DebayerCpu::getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config)
+{
+ if (outputFormat == formats::RGB888 || outputFormat == formats::BGR888) {
+ config.bpp = 24;
+ return 0;
+ }
+
+ LOG(Debayer, Info)
+ << "Unsupported output format " << outputFormat.toString();
+ return -EINVAL;
+}
+
+/*
+ * Check for standard Bayer orders and set xShift_ and swap debayer0/1, so that
+ * a single pair of BGGR debayer functions can be used for all 4 standard orders.
+ */
+int DebayerCpu::setupStandardBayerOrder(BayerFormat::Order order)
+{
+ switch (order) {
+ case BayerFormat::BGGR:
+ break;
+ case BayerFormat::GBRG:
+ xShift_ = 1; /* BGGR -> GBRG */
+ break;
+ case BayerFormat::GRBG:
+ std::swap(debayer0_, debayer1_); /* BGGR -> GRBG */
+ break;
+ case BayerFormat::RGGB:
+ xShift_ = 1; /* BGGR -> GBRG */
+ std::swap(debayer0_, debayer1_); /* GBRG -> RGGB */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int DebayerCpu::setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat)
+{
+ BayerFormat bayerFormat =
+ BayerFormat::fromPixelFormat(inputFormat);
+
+ xShift_ = 0;
+ swapRedBlueGains_ = false;
+
+ auto invalidFmt = []() -> int {
+ LOG(Debayer, Error) << "Unsupported input output format combination";
+ return -EINVAL;
+ };
+
+ switch (outputFormat) {
+ case formats::RGB888:
+ break;
+ case formats::BGR888:
+ /* Swap R and B in bayer order to generate BGR888 instead of RGB888 */
+ swapRedBlueGains_ = true;
+
+ switch (bayerFormat.order) {
+ case BayerFormat::BGGR:
+ bayerFormat.order = BayerFormat::RGGB;
+ break;
+ case BayerFormat::GBRG:
+ bayerFormat.order = BayerFormat::GRBG;
+ break;
+ case BayerFormat::GRBG:
+ bayerFormat.order = BayerFormat::GBRG;
+ break;
+ case BayerFormat::RGGB:
+ bayerFormat.order = BayerFormat::BGGR;
+ break;
+ default:
+ return invalidFmt();
+ }
+ break;
+ default:
+ return invalidFmt();
+ }
+
+ if ((bayerFormat.bitDepth == 8 || bayerFormat.bitDepth == 10 || bayerFormat.bitDepth == 12) &&
+ bayerFormat.packing == BayerFormat::Packing::None &&
+ isStandardBayerOrder(bayerFormat.order)) {
+ switch (bayerFormat.bitDepth) {
+ case 8:
+ debayer0_ = &DebayerCpu::debayer8_BGBG_BGR888;
+ debayer1_ = &DebayerCpu::debayer8_GRGR_BGR888;
+ break;
+ case 10:
+ debayer0_ = &DebayerCpu::debayer10_BGBG_BGR888;
+ debayer1_ = &DebayerCpu::debayer10_GRGR_BGR888;
+ break;
+ case 12:
+ debayer0_ = &DebayerCpu::debayer12_BGBG_BGR888;
+ debayer1_ = &DebayerCpu::debayer12_GRGR_BGR888;
+ break;
+ }
+ setupStandardBayerOrder(bayerFormat.order);
+ return 0;
+ }
+
+ if (bayerFormat.bitDepth == 10 &&
+ bayerFormat.packing == BayerFormat::Packing::CSI2) {
+ switch (bayerFormat.order) {
+ case BayerFormat::BGGR:
+ debayer0_ = &DebayerCpu::debayer10P_BGBG_BGR888;
+ debayer1_ = &DebayerCpu::debayer10P_GRGR_BGR888;
+ return 0;
+ case BayerFormat::GBRG:
+ debayer0_ = &DebayerCpu::debayer10P_GBGB_BGR888;
+ debayer1_ = &DebayerCpu::debayer10P_RGRG_BGR888;
+ return 0;
+ case BayerFormat::GRBG:
+ debayer0_ = &DebayerCpu::debayer10P_GRGR_BGR888;
+ debayer1_ = &DebayerCpu::debayer10P_BGBG_BGR888;
+ return 0;
+ case BayerFormat::RGGB:
+ debayer0_ = &DebayerCpu::debayer10P_RGRG_BGR888;
+ debayer1_ = &DebayerCpu::debayer10P_GBGB_BGR888;
+ return 0;
+ default:
+ break;
+ }
+ }
+
+ return invalidFmt();
+}
+
+int DebayerCpu::configure(const StreamConfiguration &inputCfg,
+ const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs)
+{
+ if (getInputConfig(inputCfg.pixelFormat, inputConfig_) != 0)
+ return -EINVAL;
+
+ if (stats_->configure(inputCfg) != 0)
+ return -EINVAL;
+
+ const Size &statsPatternSize = stats_->patternSize();
+ if (inputConfig_.patternSize.width != statsPatternSize.width ||
+ inputConfig_.patternSize.height != statsPatternSize.height) {
+ LOG(Debayer, Error)
+ << "mismatching stats and debayer pattern sizes for "
+ << inputCfg.pixelFormat.toString();
+ return -EINVAL;
+ }
+
+ inputConfig_.stride = inputCfg.stride;
+
+ if (outputCfgs.size() != 1) {
+ LOG(Debayer, Error)
+ << "Unsupported number of output streams: "
+ << outputCfgs.size();
+ return -EINVAL;
+ }
+
+ const StreamConfiguration &outputCfg = outputCfgs[0];
+ SizeRange outSizeRange = sizes(inputCfg.pixelFormat, inputCfg.size);
+ std::tie(outputConfig_.stride, outputConfig_.frameSize) =
+ strideAndFrameSize(outputCfg.pixelFormat, outputCfg.size);
+
+ if (!outSizeRange.contains(outputCfg.size) || outputConfig_.stride != outputCfg.stride) {
+ LOG(Debayer, Error)
+ << "Invalid output size/stride: "
+ << "\n " << outputCfg.size << " (" << outSizeRange << ")"
+ << "\n " << outputCfg.stride << " (" << outputConfig_.stride << ")";
+ return -EINVAL;
+ }
+
+ if (setDebayerFunctions(inputCfg.pixelFormat, outputCfg.pixelFormat) != 0)
+ return -EINVAL;
+
+ window_.x = ((inputCfg.size.width - outputCfg.size.width) / 2) &
+ ~(inputConfig_.patternSize.width - 1);
+ window_.y = ((inputCfg.size.height - outputCfg.size.height) / 2) &
+ ~(inputConfig_.patternSize.height - 1);
+ window_.width = outputCfg.size.width;
+ window_.height = outputCfg.size.height;
+
+ /* Don't pass x,y since process() already adjusts src before passing it */
+ stats_->setWindow(Rectangle(window_.size()));
+
+ /* pad with patternSize.Width on both left and right side */
+ lineBufferPadding_ = inputConfig_.patternSize.width * inputConfig_.bpp / 8;
+ lineBufferLength_ = window_.width * inputConfig_.bpp / 8 +
+ 2 * lineBufferPadding_;
+ for (unsigned int i = 0;
+ i < (inputConfig_.patternSize.height + 1) && enableInputMemcpy_;
+ i++) {
+ free(lineBuffers_[i]);
+ lineBuffers_[i] = (uint8_t *)malloc(lineBufferLength_);
+ if (!lineBuffers_[i])
+ return -ENOMEM;
+ }
+
+ measuredFrames_ = 0;
+ frameProcessTime_ = 0;
+
+ return 0;
+}
+
+/*
+ * Get width and height at which the bayer-pattern repeats.
+ * Return pattern-size or an empty Size for an unsupported inputFormat.
+ */
+Size DebayerCpu::patternSize(PixelFormat inputFormat)
+{
+ DebayerCpu::DebayerInputConfig config;
+
+ if (getInputConfig(inputFormat, config) != 0)
+ return {};
+
+ return config.patternSize;
+}
+
+std::vector<PixelFormat> DebayerCpu::formats(PixelFormat inputFormat)
+{
+ DebayerCpu::DebayerInputConfig config;
+
+ if (getInputConfig(inputFormat, config) != 0)
+ return std::vector<PixelFormat>();
+
+ return config.outputFormats;
+}
+
+std::tuple<unsigned int, unsigned int>
+DebayerCpu::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
+{
+ DebayerCpu::DebayerOutputConfig config;
+
+ if (getOutputConfig(outputFormat, config) != 0)
+ return std::make_tuple(0, 0);
+
+ /* round up to multiple of 8 for 64 bits alignment */
+ unsigned int stride = (size.width * config.bpp / 8 + 7) & ~7;
+
+ return std::make_tuple(stride, stride * size.height);
+}
+
+void DebayerCpu::setupInputMemcpy(const uint8_t *linePointers[])
+{
+ const unsigned int patternHeight = inputConfig_.patternSize.height;
+
+ if (!enableInputMemcpy_)
+ return;
+
+ for (unsigned int i = 0; i < patternHeight; i++) {
+ memcpy(lineBuffers_[i], linePointers[i + 1] - lineBufferPadding_,
+ lineBufferLength_);
+ linePointers[i + 1] = lineBuffers_[i] + lineBufferPadding_;
+ }
+
+ /* Point lineBufferIndex_ to first unused lineBuffer */
+ lineBufferIndex_ = patternHeight;
+}
+
+void DebayerCpu::shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src)
+{
+ const unsigned int patternHeight = inputConfig_.patternSize.height;
+
+ for (unsigned int i = 0; i < patternHeight; i++)
+ linePointers[i] = linePointers[i + 1];
+
+ linePointers[patternHeight] = src +
+ (patternHeight / 2) * (int)inputConfig_.stride;
+}
+
+void DebayerCpu::memcpyNextLine(const uint8_t *linePointers[])
+{
+ const unsigned int patternHeight = inputConfig_.patternSize.height;
+
+ if (!enableInputMemcpy_)
+ return;
+
+ memcpy(lineBuffers_[lineBufferIndex_], linePointers[patternHeight] - lineBufferPadding_,
+ lineBufferLength_);
+ linePointers[patternHeight] = lineBuffers_[lineBufferIndex_] + lineBufferPadding_;
+
+ lineBufferIndex_ = (lineBufferIndex_ + 1) % (patternHeight + 1);
+}
+
+void DebayerCpu::process2(const uint8_t *src, uint8_t *dst)
+{
+ unsigned int yEnd = window_.y + window_.height;
+ /* Holds [0] previous- [1] current- [2] next-line */
+ const uint8_t *linePointers[3];
+
+ /* Adjust src to top left corner of the window */
+ src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
+
+ /* [x] becomes [x - 1] after initial shiftLinePointers() call */
+ if (window_.y) {
+ linePointers[1] = src - inputConfig_.stride; /* previous-line */
+ linePointers[2] = src;
+ } else {
+ /* window_.y == 0, use the next line as prev line */
+ linePointers[1] = src + inputConfig_.stride;
+ linePointers[2] = src;
+ /* Last 2 lines also need special handling */
+ yEnd -= 2;
+ }
+
+ setupInputMemcpy(linePointers);
+
+ for (unsigned int y = window_.y; y < yEnd; y += 2) {
+ shiftLinePointers(linePointers, src);
+ memcpyNextLine(linePointers);
+ stats_->processLine0(y, linePointers);
+ (this->*debayer0_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+
+ shiftLinePointers(linePointers, src);
+ memcpyNextLine(linePointers);
+ (this->*debayer1_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+ }
+
+ if (window_.y == 0) {
+ shiftLinePointers(linePointers, src);
+ memcpyNextLine(linePointers);
+ stats_->processLine0(yEnd, linePointers);
+ (this->*debayer0_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+
+ shiftLinePointers(linePointers, src);
+ /* next line may point outside of src, use prev. */
+ linePointers[2] = linePointers[0];
+ (this->*debayer1_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+ }
+}
+
+void DebayerCpu::process4(const uint8_t *src, uint8_t *dst)
+{
+ const unsigned int yEnd = window_.y + window_.height;
+ /*
+ * This holds pointers to [0] 2-lines-up [1] 1-line-up [2] current-line
+ * [3] 1-line-down [4] 2-lines-down.
+ */
+ const uint8_t *linePointers[5];
+
+ /* Adjust src to top left corner of the window */
+ src += window_.y * inputConfig_.stride + window_.x * inputConfig_.bpp / 8;
+
+ /* [x] becomes [x - 1] after initial shiftLinePointers() call */
+ linePointers[1] = src - 2 * inputConfig_.stride;
+ linePointers[2] = src - inputConfig_.stride;
+ linePointers[3] = src;
+ linePointers[4] = src + inputConfig_.stride;
+
+ setupInputMemcpy(linePointers);
+
+ for (unsigned int y = window_.y; y < yEnd; y += 4) {
+ shiftLinePointers(linePointers, src);
+ memcpyNextLine(linePointers);
+ stats_->processLine0(y, linePointers);
+ (this->*debayer0_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+
+ shiftLinePointers(linePointers, src);
+ memcpyNextLine(linePointers);
+ (this->*debayer1_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+
+ shiftLinePointers(linePointers, src);
+ memcpyNextLine(linePointers);
+ stats_->processLine2(y, linePointers);
+ (this->*debayer2_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+
+ shiftLinePointers(linePointers, src);
+ memcpyNextLine(linePointers);
+ (this->*debayer3_)(dst, linePointers);
+ src += inputConfig_.stride;
+ dst += outputConfig_.stride;
+ }
+}
+
+static inline int64_t timeDiff(timespec &after, timespec &before)
+{
+ return (after.tv_sec - before.tv_sec) * 1000000000LL +
+ (int64_t)after.tv_nsec - (int64_t)before.tv_nsec;
+}
+
+void DebayerCpu::process(FrameBuffer *input, FrameBuffer *output, DebayerParams params)
+{
+ timespec frameStartTime;
+
+ if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure) {
+ frameStartTime = {};
+ clock_gettime(CLOCK_MONOTONIC_RAW, &frameStartTime);
+ }
+
+ /* Apply DebayerParams */
+ if (params.gamma != gammaCorrection_ || params.blackLevel != blackLevel_) {
+ const unsigned int blackIndex =
+ params.blackLevel * kGammaLookupSize / 256;
+ std::fill(gamma_.begin(), gamma_.begin() + blackIndex, 0);
+ const float divisor = kGammaLookupSize - blackIndex - 1.0;
+ for (unsigned int i = blackIndex; i < kGammaLookupSize; i++)
+ gamma_[i] = UINT8_MAX * powf((i - blackIndex) / divisor, params.gamma);
+
+ gammaCorrection_ = params.gamma;
+ blackLevel_ = params.blackLevel;
+ }
+
+ if (swapRedBlueGains_)
+ std::swap(params.gainR, params.gainB);
+
+ for (unsigned int i = 0; i < kRGBLookupSize; i++) {
+ constexpr unsigned int div =
+ kRGBLookupSize * DebayerParams::kGain10 / kGammaLookupSize;
+ unsigned int idx;
+
+ /* Apply gamma after gain! */
+ idx = std::min({ i * params.gainR / div, (kGammaLookupSize - 1) });
+ red_[i] = gamma_[idx];
+
+ idx = std::min({ i * params.gainG / div, (kGammaLookupSize - 1) });
+ green_[i] = gamma_[idx];
+
+ idx = std::min({ i * params.gainB / div, (kGammaLookupSize - 1) });
+ blue_[i] = gamma_[idx];
+ }
+
+ /* Copy metadata from the input buffer */
+ FrameMetadata &metadata = output->_d()->metadata();
+ metadata.status = input->metadata().status;
+ metadata.sequence = input->metadata().sequence;
+ metadata.timestamp = input->metadata().timestamp;
+
+ MappedFrameBuffer in(input, MappedFrameBuffer::MapFlag::Read);
+ MappedFrameBuffer out(output, MappedFrameBuffer::MapFlag::Write);
+ if (!in.isValid() || !out.isValid()) {
+ LOG(Debayer, Error) << "mmap-ing buffer(s) failed";
+ metadata.status = FrameMetadata::FrameError;
+ return;
+ }
+
+ stats_->startFrame();
+
+ if (inputConfig_.patternSize.height == 2)
+ process2(in.planes()[0].data(), out.planes()[0].data());
+ else
+ process4(in.planes()[0].data(), out.planes()[0].data());
+
+ metadata.planes()[0].bytesused = out.planes()[0].size();
+
+ /* Measure before emitting signals */
+ if (measuredFrames_ < DebayerCpu::kLastFrameToMeasure &&
+ ++measuredFrames_ > DebayerCpu::kFramesToSkip) {
+ timespec frameEndTime = {};
+ clock_gettime(CLOCK_MONOTONIC_RAW, &frameEndTime);
+ frameProcessTime_ += timeDiff(frameEndTime, frameStartTime);
+ if (measuredFrames_ == DebayerCpu::kLastFrameToMeasure) {
+ const unsigned int measuredFrames = DebayerCpu::kLastFrameToMeasure -
+ DebayerCpu::kFramesToSkip;
+ LOG(Debayer, Info)
+ << "Processed " << measuredFrames
+ << " frames in " << frameProcessTime_ / 1000 << "us, "
+ << frameProcessTime_ / (1000 * measuredFrames)
+ << " us/frame";
+ }
+ }
+
+ stats_->finishFrame();
+ outputBufferReady.emit(output);
+ inputBufferReady.emit(input);
+}
+
+SizeRange DebayerCpu::sizes(PixelFormat inputFormat, const Size &inputSize)
+{
+ Size patternSize = this->patternSize(inputFormat);
+ unsigned int borderHeight = patternSize.height;
+
+ if (patternSize.isNull())
+ return {};
+
+ /* No need for top/bottom border with a pattern height of 2 */
+ if (patternSize.height == 2)
+ borderHeight = 0;
+
+ /*
+ * For debayer interpolation a border is kept around the entire image
+ * and the minimum output size is pattern-height x pattern-width.
+ */
+ if (inputSize.width < (3 * patternSize.width) ||
+ inputSize.height < (2 * borderHeight + patternSize.height)) {
+ LOG(Debayer, Warning)
+ << "Input format size too small: " << inputSize.toString();
+ return {};
+ }
+
+ return SizeRange(Size(patternSize.width, patternSize.height),
+ Size((inputSize.width - 2 * patternSize.width) & ~(patternSize.width - 1),
+ (inputSize.height - 2 * borderHeight) & ~(patternSize.height - 1)),
+ patternSize.width, patternSize.height);
+}
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/debayer_cpu.h b/src/libcamera/software_isp/debayer_cpu.h
new file mode 100644
index 00000000..de216fe3
--- /dev/null
+++ b/src/libcamera/software_isp/debayer_cpu.h
@@ -0,0 +1,158 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ * Copyright (C) 2023, Red Hat Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ *
+ * CPU based debayering header
+ */
+
+#pragma once
+
+#include <memory>
+#include <stdint.h>
+#include <vector>
+
+#include <libcamera/base/object.h>
+
+#include "libcamera/internal/bayer_format.h"
+
+#include "debayer.h"
+#include "swstats_cpu.h"
+
+namespace libcamera {
+
+class DebayerCpu : public Debayer, public Object
+{
+public:
+ DebayerCpu(std::unique_ptr<SwStatsCpu> stats);
+ ~DebayerCpu();
+
+ int configure(const StreamConfiguration &inputCfg,
+ const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs);
+ Size patternSize(PixelFormat inputFormat);
+ std::vector<PixelFormat> formats(PixelFormat input);
+ std::tuple<unsigned int, unsigned int>
+ strideAndFrameSize(const PixelFormat &outputFormat, const Size &size);
+ void process(FrameBuffer *input, FrameBuffer *output, DebayerParams params);
+ SizeRange sizes(PixelFormat inputFormat, const Size &inputSize);
+
+ /**
+ * \brief Get the file descriptor for the statistics
+ *
+ * \return the file descriptor pointing to the statistics
+ */
+ const SharedFD &getStatsFD() { return stats_->getStatsFD(); }
+
+ /**
+ * \brief Get the output frame size
+ *
+ * \return The output frame size
+ */
+ unsigned int frameSize() { return outputConfig_.frameSize; }
+
+private:
+ /**
+ * \brief Called to debayer 1 line of Bayer input data to output format
+ * \param[out] dst Pointer to the start of the output line to write
+ * \param[in] src The input data
+ *
+ * Input data is an array of (patternSize_.height + 1) src
+ * pointers each pointing to a line in the Bayer source. The middle
+ * element of the array will point to the actual line being processed.
+ * Earlier element(s) will point to the previous line(s) and later
+ * element(s) to the next line(s).
+ *
+ * These functions take an array of src pointers, rather than
+ * a single src pointer + a stride for the source, so that when the src
+ * is slow uncached memory it can be copied to faster memory before
+ * debayering. Debayering a standard 2x2 Bayer pattern requires access
+ * to the previous and next src lines for interpolating the missing
+ * colors. To allow copying the src lines only once 3 temporary buffers
+ * each holding a single line are used, re-using the oldest buffer for
+ * the next line and the pointers are swizzled so that:
+ * src[0] = previous-line, src[1] = currrent-line, src[2] = next-line.
+ * This way the 3 pointers passed to the debayer functions form
+ * a sliding window over the src avoiding the need to copy each
+ * line more than once.
+ *
+ * Similarly for bayer patterns which repeat every 4 lines, 5 src
+ * pointers are passed holding: src[0] = 2-lines-up, src[1] = 1-line-up
+ * src[2] = current-line, src[3] = 1-line-down, src[4] = 2-lines-down.
+ */
+ using debayerFn = void (DebayerCpu::*)(uint8_t *dst, const uint8_t *src[]);
+
+ /* 8-bit raw bayer format */
+ void debayer8_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+ void debayer8_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+ /* unpacked 10-bit raw bayer format */
+ void debayer10_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+ void debayer10_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+ /* unpacked 12-bit raw bayer format */
+ void debayer12_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+ void debayer12_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+ /* CSI-2 packed 10-bit raw bayer format (all the 4 orders) */
+ void debayer10P_BGBG_BGR888(uint8_t *dst, const uint8_t *src[]);
+ void debayer10P_GRGR_BGR888(uint8_t *dst, const uint8_t *src[]);
+ void debayer10P_GBGB_BGR888(uint8_t *dst, const uint8_t *src[]);
+ void debayer10P_RGRG_BGR888(uint8_t *dst, const uint8_t *src[]);
+
+ struct DebayerInputConfig {
+ Size patternSize;
+ unsigned int bpp; /* Memory used per pixel, not precision */
+ unsigned int stride;
+ std::vector<PixelFormat> outputFormats;
+ };
+
+ struct DebayerOutputConfig {
+ unsigned int bpp; /* Memory used per pixel, not precision */
+ unsigned int stride;
+ unsigned int frameSize;
+ };
+
+ int getInputConfig(PixelFormat inputFormat, DebayerInputConfig &config);
+ int getOutputConfig(PixelFormat outputFormat, DebayerOutputConfig &config);
+ int setupStandardBayerOrder(BayerFormat::Order order);
+ int setDebayerFunctions(PixelFormat inputFormat, PixelFormat outputFormat);
+ void setupInputMemcpy(const uint8_t *linePointers[]);
+ void shiftLinePointers(const uint8_t *linePointers[], const uint8_t *src);
+ void memcpyNextLine(const uint8_t *linePointers[]);
+ void process2(const uint8_t *src, uint8_t *dst);
+ void process4(const uint8_t *src, uint8_t *dst);
+
+ static constexpr unsigned int kGammaLookupSize = 1024;
+ static constexpr unsigned int kRGBLookupSize = 256;
+ /* Max. supported Bayer pattern height is 4, debayering this requires 5 lines */
+ static constexpr unsigned int kMaxLineBuffers = 5;
+
+ std::array<uint8_t, kGammaLookupSize> gamma_;
+ std::array<uint8_t, kRGBLookupSize> red_;
+ std::array<uint8_t, kRGBLookupSize> green_;
+ std::array<uint8_t, kRGBLookupSize> blue_;
+ debayerFn debayer0_;
+ debayerFn debayer1_;
+ debayerFn debayer2_;
+ debayerFn debayer3_;
+ Rectangle window_;
+ DebayerInputConfig inputConfig_;
+ DebayerOutputConfig outputConfig_;
+ std::unique_ptr<SwStatsCpu> stats_;
+ uint8_t *lineBuffers_[kMaxLineBuffers];
+ unsigned int lineBufferLength_;
+ unsigned int lineBufferPadding_;
+ unsigned int lineBufferIndex_;
+ unsigned int xShift_; /* Offset of 0/1 applied to window_.x */
+ bool enableInputMemcpy_;
+ bool swapRedBlueGains_;
+ float gammaCorrection_;
+ unsigned int blackLevel_;
+ unsigned int measuredFrames_;
+ int64_t frameProcessTime_;
+ /* Skip 30 frames for things to stabilize then measure 30 frames */
+ static constexpr unsigned int kFramesToSkip = 30;
+ static constexpr unsigned int kLastFrameToMeasure = 60;
+};
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/meson.build b/src/libcamera/software_isp/meson.build
new file mode 100644
index 00000000..f7c66e28
--- /dev/null
+++ b/src/libcamera/software_isp/meson.build
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: CC0-1.0
+
+softisp_enabled = pipelines.contains('simple')
+summary({'SoftISP support' : softisp_enabled}, section : 'Configuration')
+
+if not softisp_enabled
+ subdir_done()
+endif
+
+libcamera_sources += files([
+ 'debayer.cpp',
+ 'debayer_cpu.cpp',
+ 'software_isp.cpp',
+ 'swstats_cpu.cpp',
+])
diff --git a/src/libcamera/software_isp/software_isp.cpp b/src/libcamera/software_isp/software_isp.cpp
new file mode 100644
index 00000000..c9b6be56
--- /dev/null
+++ b/src/libcamera/software_isp/software_isp.cpp
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ *
+ * Simple software ISP implementation
+ */
+
+#include "libcamera/internal/software_isp/software_isp.h"
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <libcamera/formats.h>
+#include <libcamera/stream.h>
+
+#include "libcamera/internal/bayer_format.h"
+#include "libcamera/internal/framebuffer.h"
+#include "libcamera/internal/ipa_manager.h"
+#include "libcamera/internal/mapped_framebuffer.h"
+
+#include "debayer_cpu.h"
+
+/**
+ * \file software_isp.cpp
+ * \brief Simple software ISP implementation
+ */
+
+namespace libcamera {
+
+LOG_DEFINE_CATEGORY(SoftwareIsp)
+
+/**
+ * \class SoftwareIsp
+ * \brief Class for the Software ISP
+ */
+
+/**
+ * \var SoftwareIsp::inputBufferReady
+ * \brief A signal emitted when the input frame buffer completes
+ */
+
+/**
+ * \var SoftwareIsp::outputBufferReady
+ * \brief A signal emitted when the output frame buffer completes
+ */
+
+/**
+ * \var SoftwareIsp::ispStatsReady
+ * \brief A signal emitted when the statistics for IPA are ready
+ */
+
+/**
+ * \var SoftwareIsp::setSensorControls
+ * \brief A signal emitted when the values to write to the sensor controls are
+ * ready
+ */
+
+/**
+ * \brief Constructs SoftwareIsp object
+ * \param[in] pipe The pipeline handler in use
+ * \param[in] sensor Pointer to the CameraSensor instance owned by the pipeline
+ * handler
+ */
+SoftwareIsp::SoftwareIsp(PipelineHandler *pipe, const CameraSensor *sensor)
+ : debayerParams_{ DebayerParams::kGain10, DebayerParams::kGain10,
+ DebayerParams::kGain10, 0.5f, 0 },
+ dmaHeap_(DmaHeap::DmaHeapFlag::Cma | DmaHeap::DmaHeapFlag::System)
+{
+ if (!dmaHeap_.isValid()) {
+ LOG(SoftwareIsp, Error) << "Failed to create DmaHeap object";
+ return;
+ }
+
+ sharedParams_ = SharedMemObject<DebayerParams>("softIsp_params");
+ if (!sharedParams_) {
+ LOG(SoftwareIsp, Error) << "Failed to create shared memory for parameters";
+ return;
+ }
+
+ auto stats = std::make_unique<SwStatsCpu>();
+ if (!stats->isValid()) {
+ LOG(SoftwareIsp, Error) << "Failed to create SwStatsCpu object";
+ return;
+ }
+ stats->statsReady.connect(this, &SoftwareIsp::statsReady);
+
+ debayer_ = std::make_unique<DebayerCpu>(std::move(stats));
+ debayer_->inputBufferReady.connect(this, &SoftwareIsp::inputReady);
+ debayer_->outputBufferReady.connect(this, &SoftwareIsp::outputReady);
+
+ ipa_ = IPAManager::createIPA<ipa::soft::IPAProxySoft>(pipe, 0, 0);
+ if (!ipa_) {
+ LOG(SoftwareIsp, Error)
+ << "Creating IPA for software ISP failed";
+ debayer_.reset();
+ return;
+ }
+
+ /*
+ * The API tuning file is made from the sensor name. If the tuning file
+ * isn't found, fall back to the 'uncalibrated' file.
+ */
+ std::string ipaTuningFile = ipa_->configurationFile(sensor->model() + ".yaml");
+ if (ipaTuningFile.empty())
+ ipaTuningFile = ipa_->configurationFile("uncalibrated.yaml");
+
+ int ret = ipa_->init(IPASettings{ ipaTuningFile, sensor->model() },
+ debayer_->getStatsFD(),
+ sharedParams_.fd(),
+ sensor->controls());
+ if (ret) {
+ LOG(SoftwareIsp, Error) << "IPA init failed";
+ debayer_.reset();
+ return;
+ }
+
+ ipa_->setIspParams.connect(this, &SoftwareIsp::saveIspParams);
+ ipa_->setSensorControls.connect(this, &SoftwareIsp::setSensorCtrls);
+
+ debayer_->moveToThread(&ispWorkerThread_);
+}
+
+SoftwareIsp::~SoftwareIsp()
+{
+ /* make sure to destroy the DebayerCpu before the ispWorkerThread_ is gone */
+ debayer_.reset();
+}
+
+/**
+ * \fn int SoftwareIsp::loadConfiguration([[maybe_unused]] const std::string &filename)
+ * \brief Load a configuration from a file
+ * \param[in] filename The file to load the configuration data from
+ *
+ * Currently is a stub doing nothing and always returning "success".
+ *
+ * \return 0 on success
+ */
+
+/**
+ * \brief Process the statistics gathered
+ * \param[in] sensorControls The sensor controls
+ *
+ * Requests the IPA to calculate new parameters for ISP and new control
+ * values for the sensor.
+ */
+void SoftwareIsp::processStats(const ControlList &sensorControls)
+{
+ ASSERT(ipa_);
+ ipa_->processStats(sensorControls);
+}
+
+/**
+ * \brief Check the validity of Software Isp object
+ * \return True if Software Isp is valid, false otherwise
+ */
+bool SoftwareIsp::isValid() const
+{
+ return !!debayer_;
+}
+
+/**
+ * \brief Get the output formats supported for the given input format
+ * \param[in] inputFormat The input format
+ * \return All the supported output formats or an empty vector if there are none
+ */
+std::vector<PixelFormat> SoftwareIsp::formats(PixelFormat inputFormat)
+{
+ ASSERT(debayer_);
+
+ return debayer_->formats(inputFormat);
+}
+
+/**
+ * \brief Get the supported output sizes for the given input format and size
+ * \param[in] inputFormat The input format
+ * \param[in] inputSize The input frame size
+ * \return The valid size range or an empty range if there are none
+ */
+SizeRange SoftwareIsp::sizes(PixelFormat inputFormat, const Size &inputSize)
+{
+ ASSERT(debayer_);
+
+ return debayer_->sizes(inputFormat, inputSize);
+}
+
+/**
+ * Get the output stride and the frame size in bytes for the given output format and size
+ * \param[in] outputFormat The output format
+ * \param[in] size The output size (width and height in pixels)
+ * \return A tuple of the stride and the frame size in bytes, or a tuple of 0,0
+ * if there is no valid output config
+ */
+std::tuple<unsigned int, unsigned int>
+SoftwareIsp::strideAndFrameSize(const PixelFormat &outputFormat, const Size &size)
+{
+ ASSERT(debayer_);
+
+ return debayer_->strideAndFrameSize(outputFormat, size);
+}
+
+/**
+ * \brief Configure the SoftwareIsp object according to the passed in parameters
+ * \param[in] inputCfg The input configuration
+ * \param[in] outputCfgs The output configurations
+ * \param[in] sensorControls ControlInfoMap of the controls supported by the sensor
+ * \return 0 on success, a negative errno on failure
+ */
+int SoftwareIsp::configure(const StreamConfiguration &inputCfg,
+ const std::vector<std::reference_wrapper<StreamConfiguration>> &outputCfgs,
+ const ControlInfoMap &sensorControls)
+{
+ ASSERT(ipa_ && debayer_);
+
+ int ret = ipa_->configure(sensorControls);
+ if (ret < 0)
+ return ret;
+
+ return debayer_->configure(inputCfg, outputCfgs);
+}
+
+/**
+ * \brief Export the buffers from the Software ISP
+ * \param[in] output Output stream index exporting the buffers
+ * \param[in] count Number of buffers to allocate
+ * \param[out] buffers Vector to store the allocated buffers
+ * \return The number of allocated buffers on success or a negative error code
+ * otherwise
+ */
+int SoftwareIsp::exportBuffers(unsigned int output, unsigned int count,
+ std::vector<std::unique_ptr<FrameBuffer>> *buffers)
+{
+ ASSERT(debayer_ != nullptr);
+
+ /* single output for now */
+ if (output >= 1)
+ return -EINVAL;
+
+ for (unsigned int i = 0; i < count; i++) {
+ const std::string name = "frame-" + std::to_string(i);
+ const size_t frameSize = debayer_->frameSize();
+
+ FrameBuffer::Plane outPlane;
+ outPlane.fd = SharedFD(dmaHeap_.alloc(name.c_str(), frameSize));
+ if (!outPlane.fd.isValid()) {
+ LOG(SoftwareIsp, Error)
+ << "failed to allocate a dma_buf";
+ return -ENOMEM;
+ }
+ outPlane.offset = 0;
+ outPlane.length = frameSize;
+
+ std::vector<FrameBuffer::Plane> planes{ outPlane };
+ buffers->emplace_back(std::make_unique<FrameBuffer>(std::move(planes)));
+ }
+
+ return count;
+}
+
+/**
+ * \brief Queue buffers to Software ISP
+ * \param[in] input The input framebuffer
+ * \param[in] outputs The container holding the output stream indexes and
+ * their respective frame buffer outputs
+ * \return 0 on success, a negative errno on failure
+ */
+int SoftwareIsp::queueBuffers(FrameBuffer *input,
+ const std::map<unsigned int, FrameBuffer *> &outputs)
+{
+ unsigned int mask = 0;
+
+ /*
+ * Validate the outputs as a sanity check: at least one output is
+ * required, all outputs must reference a valid stream and no two
+ * outputs can reference the same stream.
+ */
+ if (outputs.empty())
+ return -EINVAL;
+
+ for (auto [index, buffer] : outputs) {
+ if (!buffer)
+ return -EINVAL;
+ if (index >= 1) /* only single stream atm */
+ return -EINVAL;
+ if (mask & (1 << index))
+ return -EINVAL;
+
+ mask |= 1 << index;
+ }
+
+ process(input, outputs.at(0));
+
+ return 0;
+}
+
+/**
+ * \brief Starts the Software ISP streaming operation
+ * \return 0 on success, any other value indicates an error
+ */
+int SoftwareIsp::start()
+{
+ int ret = ipa_->start();
+ if (ret)
+ return ret;
+
+ ispWorkerThread_.start();
+ return 0;
+}
+
+/**
+ * \brief Stops the Software ISP streaming operation
+ */
+void SoftwareIsp::stop()
+{
+ ispWorkerThread_.exit();
+ ispWorkerThread_.wait();
+
+ ipa_->stop();
+}
+
+/**
+ * \brief Passes the input framebuffer to the ISP worker to process
+ * \param[in] input The input framebuffer
+ * \param[out] output The framebuffer to write the processed frame to
+ */
+void SoftwareIsp::process(FrameBuffer *input, FrameBuffer *output)
+{
+ debayer_->invokeMethod(&DebayerCpu::process,
+ ConnectionTypeQueued, input, output, debayerParams_);
+}
+
+void SoftwareIsp::saveIspParams()
+{
+ debayerParams_ = *sharedParams_;
+}
+
+void SoftwareIsp::setSensorCtrls(const ControlList &sensorControls)
+{
+ setSensorControls.emit(sensorControls);
+}
+
+void SoftwareIsp::statsReady()
+{
+ ispStatsReady.emit();
+}
+
+void SoftwareIsp::inputReady(FrameBuffer *input)
+{
+ inputBufferReady.emit(input);
+}
+
+void SoftwareIsp::outputReady(FrameBuffer *output)
+{
+ outputBufferReady.emit(output);
+}
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/swstats_cpu.cpp b/src/libcamera/software_isp/swstats_cpu.cpp
new file mode 100644
index 00000000..815c4d4f
--- /dev/null
+++ b/src/libcamera/software_isp/swstats_cpu.cpp
@@ -0,0 +1,432 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ * Copyright (C) 2023, Red Hat Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ *
+ * CPU based software statistics implementation
+ */
+
+#include "swstats_cpu.h"
+
+#include <libcamera/base/log.h>
+
+#include <libcamera/stream.h>
+
+#include "libcamera/internal/bayer_format.h"
+
+namespace libcamera {
+
+/**
+ * \class SwStatsCpu
+ * \brief Class for gathering statistics on the CPU
+ *
+ * CPU based software ISP statistics implementation.
+ *
+ * This class offers a configure function + functions to gather statistics on a
+ * line by line basis. This allows CPU based software debayering to interleave
+ * debayering and statistics gathering on a line by line basis while the input
+ * data is still hot in the cache.
+ *
+ * It is also possible to specify a window over which to gather statistics
+ * instead of processing the whole frame.
+ */
+
+/**
+ * \fn bool SwStatsCpu::isValid() const
+ * \brief Gets whether the statistics object is valid
+ *
+ * \return True if it's valid, false otherwise
+ */
+
+/**
+ * \fn const SharedFD &SwStatsCpu::getStatsFD()
+ * \brief Get the file descriptor for the statistics
+ *
+ * \return The file descriptor
+ */
+
+/**
+ * \fn const Size &SwStatsCpu::patternSize()
+ * \brief Get the pattern size
+ *
+ * For some input-formats, e.g. Bayer data, processing is done multiple lines
+ * and/or columns at a time. Get width and height at which the (bayer) pattern
+ * repeats. Window values are rounded down to a multiple of this and the height
+ * also indicates if processLine2() should be called or not.
+ * This may only be called after a successful configure() call.
+ *
+ * \return The pattern size
+ */
+
+/**
+ * \fn void SwStatsCpu::processLine0(unsigned int y, const uint8_t *src[])
+ * \brief Process line 0
+ * \param[in] y The y coordinate.
+ * \param[in] src The input data.
+ *
+ * This function processes line 0 for input formats with
+ * patternSize height == 1.
+ * It'll process line 0 and 1 for input formats with patternSize height >= 2.
+ * This function may only be called after a successful setWindow() call.
+ */
+
+/**
+ * \fn void SwStatsCpu::processLine2(unsigned int y, const uint8_t *src[])
+ * \brief Process line 2 and 3
+ * \param[in] y The y coordinate.
+ * \param[in] src The input data.
+ *
+ * This function processes line 2 and 3 for input formats with
+ * patternSize height == 4.
+ * This function may only be called after a successful setWindow() call.
+ */
+
+/**
+ * \var Signal<> SwStatsCpu::statsReady
+ * \brief Signals that the statistics are ready
+ */
+
+/**
+ * \typedef SwStatsCpu::statsProcessFn
+ * \brief Called when there is data to get statistics from
+ * \param[in] src The input data
+ *
+ * These functions take an array of (patternSize_.height + 1) src
+ * pointers each pointing to a line in the source image. The middle
+ * element of the array will point to the actual line being processed.
+ * Earlier element(s) will point to the previous line(s) and later
+ * element(s) to the next line(s).
+ *
+ * See the documentation of DebayerCpu::debayerFn for more details.
+ */
+
+/**
+ * \var unsigned int SwStatsCpu::ySkipMask_
+ * \brief Skip lines where this bitmask is set in y
+ */
+
+/**
+ * \var Rectangle SwStatsCpu::window_
+ * \brief Statistics window, set by setWindow(), used every line
+ */
+
+/**
+ * \var Size SwStatsCpu::patternSize_
+ * \brief The size of the bayer pattern
+ *
+ * Valid sizes are: 2x2, 4x2 or 4x4.
+ */
+
+/**
+ * \var unsigned int SwStatsCpu::xShift_
+ * \brief The offset of x, applied to window_.x for bayer variants
+ *
+ * This can either be 0 or 1.
+ */
+
+LOG_DEFINE_CATEGORY(SwStatsCpu)
+
+SwStatsCpu::SwStatsCpu()
+ : sharedStats_("softIsp_stats")
+{
+ if (!sharedStats_)
+ LOG(SwStatsCpu, Error)
+ << "Failed to create shared memory for statistics";
+}
+
+static constexpr unsigned int kRedYMul = 77; /* 0.299 * 256 */
+static constexpr unsigned int kGreenYMul = 150; /* 0.587 * 256 */
+static constexpr unsigned int kBlueYMul = 29; /* 0.114 * 256 */
+
+#define SWSTATS_START_LINE_STATS(pixel_t) \
+ pixel_t r, g, g2, b; \
+ uint64_t yVal; \
+ \
+ uint64_t sumR = 0; \
+ uint64_t sumG = 0; \
+ uint64_t sumB = 0;
+
+#define SWSTATS_ACCUMULATE_LINE_STATS(div) \
+ sumR += r; \
+ sumG += g; \
+ sumB += b; \
+ \
+ yVal = r * kRedYMul; \
+ yVal += g * kGreenYMul; \
+ yVal += b * kBlueYMul; \
+ stats_.yHistogram[yVal * SwIspStats::kYHistogramSize / (256 * 256 * (div))]++;
+
+#define SWSTATS_FINISH_LINE_STATS() \
+ stats_.sumR_ += sumR; \
+ stats_.sumG_ += sumG; \
+ stats_.sumB_ += sumB;
+
+void SwStatsCpu::statsBGGR8Line0(const uint8_t *src[])
+{
+ const uint8_t *src0 = src[1] + window_.x;
+ const uint8_t *src1 = src[2] + window_.x;
+
+ SWSTATS_START_LINE_STATS(uint8_t)
+
+ if (swapLines_)
+ std::swap(src0, src1);
+
+ /* x += 4 sample every other 2x2 block */
+ for (int x = 0; x < (int)window_.width; x += 4) {
+ b = src0[x];
+ g = src0[x + 1];
+ g2 = src1[x];
+ r = src1[x + 1];
+
+ g = (g + g2) / 2;
+
+ SWSTATS_ACCUMULATE_LINE_STATS(1)
+ }
+
+ SWSTATS_FINISH_LINE_STATS()
+}
+
+void SwStatsCpu::statsBGGR10Line0(const uint8_t *src[])
+{
+ const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
+ const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
+
+ SWSTATS_START_LINE_STATS(uint16_t)
+
+ if (swapLines_)
+ std::swap(src0, src1);
+
+ /* x += 4 sample every other 2x2 block */
+ for (int x = 0; x < (int)window_.width; x += 4) {
+ b = src0[x];
+ g = src0[x + 1];
+ g2 = src1[x];
+ r = src1[x + 1];
+
+ g = (g + g2) / 2;
+
+ /* divide Y by 4 for 10 -> 8 bpp value */
+ SWSTATS_ACCUMULATE_LINE_STATS(4)
+ }
+
+ SWSTATS_FINISH_LINE_STATS()
+}
+
+void SwStatsCpu::statsBGGR12Line0(const uint8_t *src[])
+{
+ const uint16_t *src0 = (const uint16_t *)src[1] + window_.x;
+ const uint16_t *src1 = (const uint16_t *)src[2] + window_.x;
+
+ SWSTATS_START_LINE_STATS(uint16_t)
+
+ if (swapLines_)
+ std::swap(src0, src1);
+
+ /* x += 4 sample every other 2x2 block */
+ for (int x = 0; x < (int)window_.width; x += 4) {
+ b = src0[x];
+ g = src0[x + 1];
+ g2 = src1[x];
+ r = src1[x + 1];
+
+ g = (g + g2) / 2;
+
+ /* divide Y by 16 for 12 -> 8 bpp value */
+ SWSTATS_ACCUMULATE_LINE_STATS(16)
+ }
+
+ SWSTATS_FINISH_LINE_STATS()
+}
+
+void SwStatsCpu::statsBGGR10PLine0(const uint8_t *src[])
+{
+ const uint8_t *src0 = src[1] + window_.x * 5 / 4;
+ const uint8_t *src1 = src[2] + window_.x * 5 / 4;
+ const int widthInBytes = window_.width * 5 / 4;
+
+ if (swapLines_)
+ std::swap(src0, src1);
+
+ SWSTATS_START_LINE_STATS(uint8_t)
+
+ /* x += 5 sample every other 2x2 block */
+ for (int x = 0; x < widthInBytes; x += 5) {
+ /* BGGR */
+ b = src0[x];
+ g = src0[x + 1];
+ g2 = src1[x];
+ r = src1[x + 1];
+ g = (g + g2) / 2;
+ /* Data is already 8 bits, divide by 1 */
+ SWSTATS_ACCUMULATE_LINE_STATS(1)
+ }
+
+ SWSTATS_FINISH_LINE_STATS()
+}
+
+void SwStatsCpu::statsGBRG10PLine0(const uint8_t *src[])
+{
+ const uint8_t *src0 = src[1] + window_.x * 5 / 4;
+ const uint8_t *src1 = src[2] + window_.x * 5 / 4;
+ const int widthInBytes = window_.width * 5 / 4;
+
+ if (swapLines_)
+ std::swap(src0, src1);
+
+ SWSTATS_START_LINE_STATS(uint8_t)
+
+ /* x += 5 sample every other 2x2 block */
+ for (int x = 0; x < widthInBytes; x += 5) {
+ /* GBRG */
+ g = src0[x];
+ b = src0[x + 1];
+ r = src1[x];
+ g2 = src1[x + 1];
+ g = (g + g2) / 2;
+ /* Data is already 8 bits, divide by 1 */
+ SWSTATS_ACCUMULATE_LINE_STATS(1)
+ }
+
+ SWSTATS_FINISH_LINE_STATS()
+}
+
+/**
+ * \brief Reset state to start statistics gathering for a new frame
+ *
+ * This may only be called after a successful setWindow() call.
+ */
+void SwStatsCpu::startFrame(void)
+{
+ if (window_.width == 0)
+ LOG(SwStatsCpu, Error) << "Calling startFrame() without setWindow()";
+
+ stats_.sumR_ = 0;
+ stats_.sumB_ = 0;
+ stats_.sumG_ = 0;
+ stats_.yHistogram.fill(0);
+}
+
+/**
+ * \brief Finish statistics calculation for the current frame
+ *
+ * This may only be called after a successful setWindow() call.
+ */
+void SwStatsCpu::finishFrame(void)
+{
+ *sharedStats_ = stats_;
+ statsReady.emit();
+}
+
+/**
+ * \brief Setup SwStatsCpu object for standard Bayer orders
+ * \param[in] order The Bayer order
+ *
+ * Check if order is a standard Bayer order and setup xShift_ and swapLines_
+ * so that a single BGGR stats function can be used for all 4 standard orders.
+ */
+int SwStatsCpu::setupStandardBayerOrder(BayerFormat::Order order)
+{
+ switch (order) {
+ case BayerFormat::BGGR:
+ xShift_ = 0;
+ swapLines_ = false;
+ break;
+ case BayerFormat::GBRG:
+ xShift_ = 1; /* BGGR -> GBRG */
+ swapLines_ = false;
+ break;
+ case BayerFormat::GRBG:
+ xShift_ = 0;
+ swapLines_ = true; /* BGGR -> GRBG */
+ break;
+ case BayerFormat::RGGB:
+ xShift_ = 1; /* BGGR -> GBRG */
+ swapLines_ = true; /* GBRG -> RGGB */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ patternSize_.height = 2;
+ patternSize_.width = 2;
+ ySkipMask_ = 0x02; /* Skip every 3th and 4th line */
+ return 0;
+}
+
+/**
+ * \brief Configure the statistics object for the passed in input format
+ * \param[in] inputCfg The input format
+ *
+ * \return 0 on success, a negative errno value on failure
+ */
+int SwStatsCpu::configure(const StreamConfiguration &inputCfg)
+{
+ BayerFormat bayerFormat =
+ BayerFormat::fromPixelFormat(inputCfg.pixelFormat);
+
+ if (bayerFormat.packing == BayerFormat::Packing::None &&
+ setupStandardBayerOrder(bayerFormat.order) == 0) {
+ switch (bayerFormat.bitDepth) {
+ case 8:
+ stats0_ = &SwStatsCpu::statsBGGR8Line0;
+ return 0;
+ case 10:
+ stats0_ = &SwStatsCpu::statsBGGR10Line0;
+ return 0;
+ case 12:
+ stats0_ = &SwStatsCpu::statsBGGR12Line0;
+ return 0;
+ }
+ }
+
+ if (bayerFormat.bitDepth == 10 &&
+ bayerFormat.packing == BayerFormat::Packing::CSI2) {
+ patternSize_.height = 2;
+ patternSize_.width = 4; /* 5 bytes per *4* pixels */
+ /* Skip every 3th and 4th line, sample every other 2x2 block */
+ ySkipMask_ = 0x02;
+ xShift_ = 0;
+
+ switch (bayerFormat.order) {
+ case BayerFormat::BGGR:
+ case BayerFormat::GRBG:
+ stats0_ = &SwStatsCpu::statsBGGR10PLine0;
+ swapLines_ = bayerFormat.order == BayerFormat::GRBG;
+ return 0;
+ case BayerFormat::GBRG:
+ case BayerFormat::RGGB:
+ stats0_ = &SwStatsCpu::statsGBRG10PLine0;
+ swapLines_ = bayerFormat.order == BayerFormat::RGGB;
+ return 0;
+ default:
+ break;
+ }
+ }
+
+ LOG(SwStatsCpu, Info)
+ << "Unsupported input format " << inputCfg.pixelFormat.toString();
+ return -EINVAL;
+}
+
+/**
+ * \brief Specify window coordinates over which to gather statistics
+ * \param[in] window The window object.
+ */
+void SwStatsCpu::setWindow(const Rectangle &window)
+{
+ window_ = window;
+
+ window_.x &= ~(patternSize_.width - 1);
+ window_.x += xShift_;
+ window_.y &= ~(patternSize_.height - 1);
+
+ /* width_ - xShift_ to make sure the window fits */
+ window_.width -= xShift_;
+ window_.width &= ~(patternSize_.width - 1);
+ window_.height &= ~(patternSize_.height - 1);
+}
+
+} /* namespace libcamera */
diff --git a/src/libcamera/software_isp/swstats_cpu.h b/src/libcamera/software_isp/swstats_cpu.h
new file mode 100644
index 00000000..363e326f
--- /dev/null
+++ b/src/libcamera/software_isp/swstats_cpu.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * Copyright (C) 2023, Linaro Ltd
+ * Copyright (C) 2023, Red Hat Inc.
+ *
+ * Authors:
+ * Hans de Goede <hdegoede@redhat.com>
+ *
+ * CPU based software statistics implementation
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+#include <libcamera/base/signal.h>
+
+#include <libcamera/geometry.h>
+
+#include "libcamera/internal/bayer_format.h"
+#include "libcamera/internal/shared_mem_object.h"
+#include "libcamera/internal/software_isp/swisp_stats.h"
+
+namespace libcamera {
+
+class PixelFormat;
+struct StreamConfiguration;
+
+class SwStatsCpu
+{
+public:
+ SwStatsCpu();
+ ~SwStatsCpu() = default;
+
+ bool isValid() const { return sharedStats_.fd().isValid(); }
+
+ const SharedFD &getStatsFD() { return sharedStats_.fd(); }
+
+ const Size &patternSize() { return patternSize_; }
+
+ int configure(const StreamConfiguration &inputCfg);
+ void setWindow(const Rectangle &window);
+ void startFrame();
+ void finishFrame();
+
+ void processLine0(unsigned int y, const uint8_t *src[])
+ {
+ if ((y & ySkipMask_) || y < static_cast<unsigned int>(window_.y) ||
+ y >= (window_.y + window_.height))
+ return;
+
+ (this->*stats0_)(src);
+ }
+
+ void processLine2(unsigned int y, const uint8_t *src[])
+ {
+ if ((y & ySkipMask_) || y < static_cast<unsigned int>(window_.y) ||
+ y >= (window_.y + window_.height))
+ return;
+
+ (this->*stats2_)(src);
+ }
+
+ Signal<> statsReady;
+
+private:
+ using statsProcessFn = void (SwStatsCpu::*)(const uint8_t *src[]);
+
+ int setupStandardBayerOrder(BayerFormat::Order order);
+ /* Bayer 8 bpp unpacked */
+ void statsBGGR8Line0(const uint8_t *src[]);
+ /* Bayer 10 bpp unpacked */
+ void statsBGGR10Line0(const uint8_t *src[]);
+ /* Bayer 12 bpp unpacked */
+ void statsBGGR12Line0(const uint8_t *src[]);
+ /* Bayer 10 bpp packed */
+ void statsBGGR10PLine0(const uint8_t *src[]);
+ void statsGBRG10PLine0(const uint8_t *src[]);
+
+ /* Variables set by configure(), used every line */
+ statsProcessFn stats0_;
+ statsProcessFn stats2_;
+ bool swapLines_;
+
+ unsigned int ySkipMask_;
+
+ Rectangle window_;
+
+ Size patternSize_;
+
+ unsigned int xShift_;
+
+ SharedMemObject<SwIspStats> sharedStats_;
+ SwIspStats stats_;
+};
+
+} /* namespace libcamera */