/*
 *  $Id: matfile.c 28848 2025-11-10 18:54:08Z yeti-dn $
 *  Copyright (C) 2009-2025 David Necas (Yeti).
 *  E-mail: yeti@gwyddion.net.
 *
 *  This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public
 *  License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any
 *  later version.
 *
 *  This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied
 *  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along with this program; if not, write to the
 *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
 * [FILE-MAGIC-MISSING]
 * Avoding clash with a standard file format.
 **/

/**
 * [FILE-MAGIC-USERGUIDE]
 * Matlab MAT 5 files
 * .mat
 * Read
 **/

#include "config.h"
#include <glib/gi18n-lib.h>
#include <string.h>
#include <stdlib.h>
#include <gwy.h>
#include "gwyzlib.h"
#include "err.h"
#include "get.h"

#define MAGIC_SWAP "\x01\x00\x4d\x49"
#define MAGIC_NATIVE "\x00\x01\x49\x4d"
#define MAGIC_SIZE (sizeof(MAGIC_NATIVE)-1)
#define MAGIC_OFFSET 124

#define EXTENSION ".mat"

#define MAT5_PAD(x) (((x) + MAT5_TAG_SIZE-1)/MAT5_TAG_SIZE*MAT5_TAG_SIZE)

enum {
    MAXDEPTH = 2,
};

enum {
    MAT5_HEADER_SIZE = 128,
    MAT5_TAG_SIZE = 8,
    MAT5_VAR_SIZE = 56,    /* minimum variable size, if not packed into tag */
};

typedef enum {
    MAT5_INT8       = 1,
    MAT5_UINT8      = 2,
    MAT5_INT16      = 3,
    MAT5_UINT16     = 4,
    MAT5_INT32      = 5,
    MAT5_UINT32     = 6,
    MAT5_SINGLE     = 7,
    MAT5_DOUBLE     = 9,
    MAT5_INT64      = 12,
    MAT5_UINT64     = 13,
    MAT5_MATRIX     = 14,
    MAT5_COMPRESSED = 15,
    MAT5_UTF8       = 16,
    MAT5_UTF16      = 17,    /* endian applies to these too */
    MAT5_UTF32      = 18,
} Mat5DataType;

typedef enum {
    MAT5_CLASS_CELL     = 1,
    MAT5_CLASS_STRUCT   = 2,
    MAT5_CLASS_OBJECT   = 3,
    MAT5_CLASS_CHAR     = 4,
    MAT5_CLASS_SPARSE   = 5,
    MAT5_CLASS_DOUBLE   = 6,
    MAT5_CLASS_SINGLE   = 7,
    MAT5_CLASS_INT8     = 8,
    MAT5_CLASS_UINT8    = 9,
    MAT5_CLASS_INT16    = 10,
    MAT5_CLASS_UINT16   = 11,
    MAT5_CLASS_INT32    = 12,
    MAT5_CLASS_UINT32   = 13,
    MAT5_CLASS_INT64    = 14,
    MAT5_CLASS_UINT64   = 15,
    MAT5_CLASS_FUNCTION = 16,
} Mat5ClassType;

typedef enum {
    MAT5_FLAG_COMPLEX = 0x1000,
    MAT5_FLAG_GLOBAL  = 0x2000,
    MAT5_FLAG_LOGICAL = 0x4000,
    MAT5_FLAG_MASK    = 0x7000,
    MAT5_CLASS_MASK   = 0x00ff,
} Mat5ArrayFlags;

enum {
    MAX_DIMS = 4,
};

struct _Mat5Element;
struct _Mat5Matrix;

typedef struct _Mat5Element {
    Mat5DataType type;
    guint nitems;
    /* Single-item value are represented directly, arrays are kept as pointers. */
    union {
        gint i;
        gint *ia;
        guint u;
        guint *ua;
        gdouble f;
        gdouble *fa;
        gint64 i64;
        gint64 *i64a;
        guint64 u64;
        guint64 *u64a;
        guchar *str;
        struct Mat5Matrix *mat;
    } value;
} Mat5Element;

typedef struct _Mat5Matrix {
    Mat5ClassType klass;
    Mat5ArrayFlags flags;
    guint dims[MAX_DIMS];
    guint nitems;    /* calculated */
    gchar *name;
    /* Regular arrays */
    gpointer real;
    gpointer imag;
    /* Structures */
    gchar **field_names;
    struct Mat5Matrix *fields;
    /* Objects, in addition */
    gchar *class_name;
} Mat5Matrix;

struct _Mat5FileCursor;
struct _Mat5FileContext;

typedef struct _Mat5FileContext {
    guint32 (*get_guint32)(const guchar **p);
    GByteArray *zbuffer;
    struct _Mat5FileCursor *zbuffer_owner;
    gboolean msb;
} Mat5FileContext;

typedef struct _Mat5FileCursor {
    Mat5FileContext *context;
    gsize size;                /* This is *remaining* size */
    const guchar *p;           /* Buffer position. */
    const guchar *zp;          /* Data pointer (possibly to deflated data). */
    Mat5DataType data_type;
    guint nbytes;
} Mat5FileCursor;

static gboolean  module_register   (void);
static gint      detect_file       (const GwyFileDetectInfo *fileinfo,
                                    gboolean only_name);
static GwyFile*  load_file         (const gchar *filename,
                                    GwyRunModeFlags mode,
                                    GError **error);
static GwyField* try_read_field    (Mat5FileCursor *parent,
                                    GString *name);
static gboolean  zinflate_variable (Mat5FileCursor *cursor,
                                    GError **error);
static gboolean  raw_type_from_mat5(Mat5DataType dtype,
                                    GwyRawDataType *rawtype);

/* Indexed by Mat5DataType */
static const guint typesize[] = { 0, 1, 1, 2, 2, 4, 4, 4, 0, 8, 0, 0, 0, 8, 8, 0, 0, 1, 2, 4, };

static GwyModuleInfo module_info = {
    GWY_MODULE_ABI_VERSION,
    &module_register,
    N_("Imports Matlab MAT files v5."),
    "Yeti <yeti@gwyddion.net>",
    "0.3",
    "David Nečas (Yeti)",
    "2009",
};

GWY_MODULE_QUERY2(module_info, matfile)

static gboolean
module_register(void)
{
    gwy_file_func_register("mat5file",
                           N_("Matlab MAT 5 file (.mat)"),
                           detect_file, load_file, NULL, NULL);

    return TRUE;
}

static gint
detect_file(const GwyFileDetectInfo *fileinfo, gboolean only_name)
{
    gint score = 0;

    if (only_name)
        return g_str_has_suffix(fileinfo->name_lowercase, EXTENSION) ? 10 : 0;

    if (fileinfo->buffer_len > MAGIC_OFFSET + MAGIC_SIZE
        && (memcmp(fileinfo->head + MAGIC_OFFSET, MAGIC_NATIVE, MAGIC_SIZE) == 0
            || memcmp(fileinfo->head + MAGIC_OFFSET, MAGIC_SWAP, MAGIC_SIZE) == 0))

        score = 100;

    return score;
}

static gboolean
mat5_next_tag(Mat5FileCursor *cursor, GError **error)
{
    Mat5FileContext *fc = cursor->context;
    guint padded_size;

    /* If we own the buffer and are called to get the next tag, then the previous tag has been processed and the
     * buffer is free to use again. */
    if (fc->zbuffer_owner == cursor)
        fc->zbuffer_owner = NULL;

    if (cursor->size < MAT5_TAG_SIZE) {
        err_TRUNCATED_PART(error, "MAT5 tag");
        return FALSE;
    }

    /* Short tags have nonzero in first two bytes.  So says the Matlab docs, however, this statement seems to be
     * a big-endianism.  Check the two *upper* bytes of data type. */
    cursor->data_type = fc->get_guint32(&cursor->p);
    cursor->nbytes = cursor->data_type >> 16;
    gwy_debug("raw data_type: %08x", cursor->data_type);
    if (cursor->nbytes == 0) {
        /* Normal (long) tag */
        cursor->nbytes = fc->get_guint32(&cursor->p);
        cursor->size -= MAT5_TAG_SIZE;
        /* Elements *start* at multiples of 8, but the last element in the file may lack the padding. */
        padded_size = MIN(MAT5_PAD(cursor->nbytes), cursor->size);
        gwy_debug("Data of type %u, size %u",
                  cursor->data_type, cursor->nbytes);
        if (cursor->nbytes > cursor->size) {
            err_TRUNCATED_PART(error, "MAT5 tag");
            return FALSE;
        }
        cursor->zp = cursor->p;
        cursor->p += padded_size;

        if (cursor->data_type == MAT5_COMPRESSED) {
            if (fc->zbuffer_owner) {
                g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_DATA,
                            _("Compressed data inside compressed data found."));
                return FALSE;
            }

            /* This inflates from zp into zbuffer */
            if (!zinflate_variable(cursor, error))
                return FALSE;

            fc->zbuffer_owner = cursor;
            cursor->size -= padded_size;

            /* Only after that we make zp the data pointer */
            cursor->zp = fc->zbuffer->data;
            cursor->data_type = fc->get_guint32(&cursor->zp);
            cursor->nbytes = fc->get_guint32(&cursor->zp);
            gwy_debug("+CompressedData of type %u, size %u", cursor->data_type, cursor->nbytes);
        }
        else
            cursor->size -= padded_size;
    }
    else {
        /* Short tag, the length seems to be simply the upper two bytes, whereas the data type the lower two bytes.
         * Do not trust the nice boxes showing which byte is which in the documentation.  They were made by the evil
         * big-endian people.  */
        cursor->data_type &= 0xffff;
        cursor->zp = cursor->p;
        cursor->p += 4;
        cursor->size -= MAT5_TAG_SIZE;
        if (cursor->data_type >= G_N_ELEMENTS(typesize)
            || cursor->nbytes > 4
            || cursor->nbytes < typesize[cursor->data_type]
            || cursor->nbytes % typesize[cursor->data_type]) {
            g_set_error(error, GWY_MODULE_FILE_ERROR, GWY_MODULE_FILE_ERROR_DATA,
                        _("Invalid short tag of type %u claims to consists of %u bytes."),
                        cursor->data_type, cursor->nbytes);
            return FALSE;
        }
        gwy_debug("ShortData of type %u", cursor->data_type);
    }

    /* Here zp always points to the real data and p after the buffer data */
    return TRUE;
}

static GwyFile*
load_file(const gchar *filename,
          G_GNUC_UNUSED GwyRunModeFlags mode,
          GError **error)
{
    GwyFile *file = NULL;
    guchar *buffer = NULL;
    const guchar *p;
    gsize size = 0;
    gint G_GNUC_UNUSED tagno = 0, id = 0;
    GError *err = NULL;
    GwyField *dfield = NULL;
    GString *name = NULL;
    Mat5FileContext fc;
    Mat5FileCursor cursor;

    gwy_clear1(cursor);
    gwy_clear1(fc);
    cursor.context = &fc;

    if (!gwy_file_get_contents(filename, &buffer, &size, &err)) {
        err_GET_FILE_CONTENTS(error, &err);
        return NULL;
    }

    if (size < MAT5_HEADER_SIZE) {
        err_TOO_SHORT(error);
        goto fail;
    }

    p = buffer + MAGIC_OFFSET;
    if (memcmp(p, MAGIC_NATIVE, MAGIC_SIZE) == 0)
        fc.msb = FALSE;
    else if (memcmp(p, MAGIC_SWAP, MAGIC_SIZE) == 0)
        fc.msb = TRUE;
    else {
        err_FILE_TYPE(error, "Matlab MAT5");
        goto fail;
    }

#if (G_BYTE_ORDER == G_BIG_ENDIAN)
    fc.msb = !fc.msb;
#endif

    fc.get_guint32 = fc.msb ? gwy_get_guint32_be : gwy_get_guint32_le;
    fc.zbuffer = g_byte_array_new();
    name = g_string_new(NULL);

    cursor.p = buffer + MAT5_HEADER_SIZE;
    cursor.size = size - MAT5_HEADER_SIZE;

    for (tagno = 0; cursor.size; tagno++) {
        if (!mat5_next_tag(&cursor, error))
            goto fail;

        /* The only interesting case is MATRIX */
        if (cursor.data_type == MAT5_MATRIX) {
            if ((dfield = try_read_field(&cursor, name))) {
                if (!file)
                    file = gwy_file_new_in_construction();

                gwy_file_pass_image(file, id, dfield);
                gwy_file_set_title(file, GWY_FILE_IMAGE, id, name->str, FALSE);
                gwy_log_add_import(file, GWY_FILE_IMAGE, id, NULL, filename);
                id++;
            }
        }
    }

    if (!file)
        err_NO_DATA(error);

fail:
    gwy_file_abandon_contents(buffer, size, NULL);
    g_string_free(name, TRUE);
    g_byte_array_free(fc.zbuffer, TRUE);

    return file;
}

static GwyField*
try_read_field(Mat5FileCursor *parent,
               GString *name)
{
    Mat5FileContext *fc = parent->context;
    Mat5FileCursor cursor;
    Mat5ClassType klass;
    G_GNUC_UNUSED Mat5ArrayFlags array_flags;
    GwyRawDataType rawtype;
    gint i, xres, yres;
    GwyField *dfield, *buf;
    guint flags, rawsize;

    gwy_clear1(cursor);
    cursor.context = parent->context;
    cursor.p = parent->zp;
    cursor.size = parent->nbytes;

    /* Array flags */
    if (!mat5_next_tag(&cursor, NULL) || cursor.data_type != MAT5_UINT32 || cursor.nbytes != 2*4)
        return NULL;

    flags = fc->get_guint32(&cursor.zp);
    array_flags = flags & MAT5_FLAG_MASK;
    klass = flags & MAT5_CLASS_MASK;
    gwy_debug("array_flags = %02x, class = %02x", array_flags, klass);
    /* reserved: fc->get_guint32(&cursor.zp); */

    /* Dimensions array */
    /* We can only import two-dimensional arrays */
    if (!mat5_next_tag(&cursor, NULL) || cursor.data_type != MAT5_INT32 || cursor.nbytes != 2*4)
        return NULL;

    yres = fc->get_guint32(&cursor.zp);
    xres = fc->get_guint32(&cursor.zp);
    gwy_debug("xres = %d, yres = %d", xres, yres);
    if (err_DIMENSION(NULL, xres) || err_DIMENSION(NULL, yres))
        return NULL;

    /* Array name */
    if (!mat5_next_tag(&cursor, NULL) || cursor.data_type != MAT5_INT8)
        return NULL;

    g_string_truncate(name, 0);
    g_string_append_len(name, cursor.zp, cursor.nbytes);
    gwy_debug("name = %s", name->str);

    /* Debug struct contents */
    if (klass == MAT5_CLASS_STRUCT) {
        guint n, field_name_len;

        if (!mat5_next_tag(&cursor, NULL) || cursor.data_type != MAT5_INT32)
            return NULL;

        field_name_len = fc->get_guint32(&cursor.zp);
        gwy_debug("field_name_len: %u", field_name_len);

        if (!mat5_next_tag(&cursor, NULL) || cursor.data_type != MAT5_INT8)
            return NULL;

        n = cursor.nbytes/field_name_len;
        for (i = 0; i < n; i++) {
            gwy_debug("struct field%d %s", i, cursor.zp);
            cursor.zp += field_name_len;
            cursor.nbytes -= field_name_len;
        }
        return NULL;
    }

    /* Real part */
    if (!mat5_next_tag(&cursor, NULL))
        return NULL;
    gwy_debug("data_type %u, type size %u", cursor.data_type, typesize[cursor.data_type]);
    if (!raw_type_from_mat5(cursor.data_type, &rawtype))
        return NULL;
    rawsize = gwy_raw_data_size(rawtype);
    if (xres*yres > cursor.nbytes/rawsize)
        return NULL;

    buf = gwy_field_new(yres, xres, yres, xres, FALSE);
    dfield = gwy_field_new(xres, yres, xres, yres, FALSE);
    gwy_convert_raw_data(cursor.zp, xres*yres, 1,
                         rawtype, cursor.context->msb ? GWY_BYTE_ORDER_BIG_ENDIAN : GWY_BYTE_ORDER_LITTLE_ENDIAN,
                         gwy_field_get_data(buf), 1.0, 0.0);
    gwy_field_transpose(buf, dfield, FALSE);
    g_object_unref(buf);
    cursor.zp += xres*yres*rawsize;

    /* Imaginary part (optional): FIXME currently ignored. */

    return dfield;
}

static gboolean
raw_type_from_mat5(Mat5DataType dtype, GwyRawDataType *rawtype)
{
    static const GwyRawDataType typemap[] = {
        G_MAXUINT,
        GWY_RAW_DATA_SINT8, GWY_RAW_DATA_UINT8,
        GWY_RAW_DATA_SINT16, GWY_RAW_DATA_UINT16,
        GWY_RAW_DATA_SINT32, GWY_RAW_DATA_UINT32,
        GWY_RAW_DATA_FLOAT, G_MAXUINT,
        GWY_RAW_DATA_DOUBLE, G_MAXUINT, G_MAXUINT,
        GWY_RAW_DATA_SINT64, GWY_RAW_DATA_UINT64,
    };
    if (dtype >= G_N_ELEMENTS(typemap) || typemap[dtype] == G_MAXUINT)
        return FALSE;

    *rawtype = typemap[dtype];
    return TRUE;
}

static gboolean
zinflate_variable(Mat5FileCursor *cursor,
                  GError **error)
{
    Mat5FileContext *fc = cursor->context;
    guchar tag[MAT5_TAG_SIZE];
    const guchar *p = tag;
    gsize csize, decomp_size;
    guint nbytes;

    csize = cursor->nbytes;
    decomp_size = MAT5_TAG_SIZE;
    if (!gwyzlib_unpack_compressed_data(cursor->zp, &csize, tag, &decomp_size, error))
        return FALSE;

    /* FIXME: We may be aborted here if nbytes is utter crap. */
    fc->get_guint32(&p);    /* data_type */
    nbytes = fc->get_guint32(&p);
    g_byte_array_set_size(fc->zbuffer, nbytes + MAT5_TAG_SIZE);

    csize = cursor->nbytes;
    decomp_size = fc->zbuffer->len;
    return !!gwyzlib_unpack_compressed_data(cursor->zp, &csize, fc->zbuffer->data, &decomp_size, error);
}

/* vim: set cin columns=120 tw=118 et ts=4 sw=4 cino=>1s,e0,n0,f0,{0,}0,^0,\:1s,=0,g1s,h0,t0,+1s,c3,(0,u0 : */
