PPM file parser in Godot 4.2

Godot Version

4.2

Question

I wanted to implement this .ppm flipnote file parser in Godot 4.2.

I tried to make the code work as close as possible to how the python code works. Here is my GDScript code:

extends Resource
class_name PPMFile
## Flipnote file
##
## Class representing a flipnote file. Contains all useful information about flipnotes.
## Credits to https://github.com/Flipnote-Collective/ppm-parser
## and https://github.com/Flipnote-Collective/flipnote-studio-docs/wiki/PPM-format

# Flipnote speed -> frames per second
const FRAMERATES = {
    1: 0.5,
    2: 1,
    3: 2,
    4: 4,
    5: 6,
    6: 12,
    7: 20,
    8: 30,
}

# Frame RGB colors
const BLACK: = Color8(0x0E, 0x0E, 0x0E)
const WHITE: = Color8(0xFF, 0xFF, 0xFF)
const BLUE: = Color8(0x0A, 0x39, 0xFF)
const RED: = Color8(0xFF, 0x2A, 0x2A)

const THUMBNAIL_PALETTE: = [
    "FFFFFF",
    "525252",
    "FFFFFF",
    "9C9C9C",
    "FF4844",
    "C8514F",
    "FFADAC",
    "00FF00",
    "4840FF",
    "514FB8",
    "ADABFF",
    "00FF00",
    "B657B7",
    "00FF00",
    "00FF00",
    "00FF00",
]

var file: FileAccess

var magic: String
var animation_data_size: int
var sound_data_size: int
var frame_count: int
var format_version: int

var locked: bool = false
var thumbnail_frame: int
var root_author_name: String
var parent_author_name: String
var current_author_name: String
var parent_author_id: int
var current_author_id: int
var parent_filename: String
var current_filename: String
var root_author_id: int
var last_timestamp: Dictionary

var offset_table: Array
var loop: bool
var layer_1_visible: bool
var layer_2_visible: bool

var bgm_size: int
var sfx1_size: int
var sfx2_size: int
var sfx3_size: int
var frame_speed: int
var bgm_speed: int

# Buffers
var layers: Array 
var prev_layers: Array
var prev_frame_index: = -1


static func open(path: String) -> PPMFile:
    return PPMFile.new(path)

func _init(path: String = "") -> void:
    if path:
        file = FileAccess.open(path, FileAccess.READ)
        load_ppm()

func load_ppm() -> void:
    read_header()
    read_metadata()
    read_animation_header()
    read_sound_header()
    layers = _clean_layers()
    prev_layers = _clean_layers()

func unload() -> void:
    file.close()

func read_header() -> void:
    # Decode header
    magic = _read_string(4)
    animation_data_size = file.get_32()
    sound_data_size = file.get_32()
    frame_count = file.get_16()
    format_version = file.get_16()
    
func read_metadata() -> void:
    var read_filename: Callable = func(offset: int):
        # Parent and current filenames are stored as:
        #  - 3 bytes representing the last 6 digits of the Consoles's MAC address
        #  - 13-character string
        #  - uint16 edit counter
        var mac: String = "%02X%02X%02X" % [file.get_8(), file.get_8(), file.get_8()]
        var ident: = _read_string(13)
        var edits: = file.get_16()
        return mac + "_" + ident + "_%03d" % edits
    file.seek(0x10)
    locked = bool(file.get_16())
    thumbnail_frame = file.get_16()
    root_author_name = _read_string(22)
    parent_author_name = _read_string(22)
    current_author_name = _read_string(22)
    parent_author_id = file.get_8()
    current_author_id = file.get_8()
    parent_filename = read_filename.call(0x10 + 0x56)
    current_filename = read_filename.call(0x10 + 0x68)
    root_author_id = file.get_64()
    var fragmented: = file.get_64()
    # We add 946684800 to convert this to a more common unix timestamp, which start on jan 1st 1970
    last_timestamp = Time.get_datetime_dict_from_unix_time(file.get_32() + 946684800)

func read_thumbnail() -> Array[Array]:
    file.seek(0xA0)
    var bitmap: Array[Array] = []
    for y in 48:
        var row: = []
        for i in 64:
            row.append(0)
        bitmap.append(row)
    
    for tile_index in 48:
        var tile_x: = tile_index % 8 * 8
        var tile_y: = int(tile_index / 8) * 8
        for line in 8:
            for pixel in range(0, 8, 2):
                var byte: = file.get_8()
                var x: = tile_x + pixel
                var y: = tile_y + line
                bitmap[y][x] = byte & 0x0F
                bitmap[y][x+1] = (byte >> 4) & 0x0F
    return bitmap

func read_animation_header() -> void:
    file.seek(0x06A0)
    var table_size: = file.get_16()
    var unknown: = file.get_32()
    var flags: = file.get_16()
    # Unpack flags
    layer_1_visible = (flags >> 11) & 0x01
    layer_2_visible = (flags >> 10) & 0x01
    loop = (flags >> 1) & 0x01
    # Make offset table
    offset_table = []
    for i in table_size:
        offset_table.append(
            file.get_32() + 0x06A8 + table_size
        )

func read_sound_header() -> void:
    var offset: = 0x06A0 + animation_data_size + frame_count
    if offset % 2 != 0:
        offset += 4 - (offset % 4)
    file.seek(offset)
    bgm_size = file.get_32()
    sfx1_size = file.get_32()
    sfx2_size = file.get_32()
    sfx3_size = file.get_32()
    frame_speed = 8 - file.get_8()
    bgm_speed = 8 - file.get_8()

func read_frame(index: int) -> Array:
    var is_frame_new: = func(_index: int):
        file.seek(offset_table[_index])
        return file.get_8() >> 7 & 0x1
    var read_line_types: = func(line_types: PackedByteArray):
        var output: Array[Array] = []
        for i in 192:
            var line_type: int = line_types[int(i / 4)] >> ((index % 4) * 2) & 0x03
            output.append([i, line_type])
        return output

    # Decode previous frames if needed
    if (index != 0) and (prev_frame_index != index) and (not is_frame_new.call(index)):
        read_frame(index - 1)
    # Copy current layer buffer to previous layer buffer
    prev_layers = layers.duplicate(true)
    prev_frame_index = index
    # Clear current buffer
    layers = _clean_layers()
    
    file.seek(offset_table[index])
    # Unpack frame header flags
    var header: = file.get_8()
    var is_new_frame: = (header >> 7) & 0x1
    var is_translated: = (header >> 5) & 0x3
    var translation_x: = 0
    var translation_y: = 0
    # If the frame is translated, we need to unpack the x and y values
    if is_translated:
        translation_x = file.get_8()
        translation_y = file.get_8()
    # Line encoding bytes
    var line_types: = [
        file.get_buffer(48),
        file.get_buffer(48),
    ]
    
    # Loop through layers
    for layer in 2:
        var bitmap: Array = layers[layer]
        for line in read_line_types.call(line_types[layer]):
            var pixel = 0
            #print(line[1])
            if line[1] == 0:
                # No data is stored for this line
                pass
                # Compressed line
            elif line[1] in [1, 2]:
                # If line type == 2, the line starts off with all the pixels set to 1
                if line[1] == 2:
                    for i in 256:
                        bitmap[line[0]][i] = 1
                # Unpack chunk usage
                var chunk_usage: = file.get_32()
                # Unpack pixel chunks
                while pixel < 256:
                    if chunk_usage & 0x80000000:
                        var chunk: = file.get_8()
                        for bit in 8:
                            bitmap[line[0]][pixel] = chunk >> bit & 0x1
                            pixel += 1
                    else:
                        pixel += 8
                    chunk_usage = chunk_usage << 1
                # Raw line
            elif line[1] == 3:
                # Unpack pixel chunks
                while pixel < 256:
                    var chunk: = file.get_8()
                    for bit in 8:
                        bitmap[line[0]][pixel] = chunk >> bit & 0x1
                        pixel += 1
    
    # Frame diffing - If the current frame is based on the preivous one, merge them by XORing their pixel
    if not is_frame_new:
        # Loop through lines
        for y in 192:
            # Skip to next line if this one falls off the top edge of the screen
            if y - translation_y < 0:
                continue
            # Stop once the bottom of the screen has been reached
            if y - translation_y >= 192:
                break
            for x in 256:
                # Skip to the next pixel if this one falls off the left edge of the screen
                if x - translation_x < 0:
                    continue
                # Stop diffing this line once the right screen edge has been reached
                if x - translation_x >= 256:
                    break
                # Diff pixels with binary XOR
                layers[0][y][x] ^= self.prev_layers[0][y - translation_y][x - translation_x]
                layers[1][y][x] ^= self.prev_layers[1][y - translation_y][x - translation_x]
    return layers

func get_frame_palette(index: int) -> Array:
    file.seek(offset_table[index])
    var header: = file.get_8()
    var paper_color: = header & 0x1
    var pen: = [
        null,
        BLACK if paper_color == 1 else WHITE,
        RED,
        BLUE,
    ]
    return [
        WHITE if paper_color == 1 else BLACK,
        pen[(header >> 1) & 0x3], # Layer 1 color
        pen[(header >> 3) & 0x3], # Layer 2 color
    ]

func get_frame_pixels(index: int) -> Array:
    var layers = read_frame(index)
    var pixels: = []
    for _y in 192:
        var row: = []
        for _x in 256:
            row.append(0)
        pixels.append(row)
        
    for y in 192:
        for x in 256:
            if layers[0][y][x] > 0:
                pixels[y][x] = 1
            elif layers[1][y][x] > 0:
                pixels[y][x] = 2
    return pixels

func _read_string(length: int) -> String:
    var output: String
    for i in length:
        output += char(file.get_8())
    return output

func _clean_layers() -> Array:
    var output: = []
    for _i in 2:
        var layer: = []
        for _y in 192:
            var row: = []
            for _x in 256:
                row.append(0)
            layer.append(row)
        output.append(layer)
    return output

I don’t understand what’s wrong, as the FileAccess class should work similarly, if not identically, to Python byte streams, but the frames come out heavily distorted. The only thing I’ve been able to figure out is that the problem seems to be right in the read_frame function. Is there anything I’m missing here? Does someone understand what did I do wrong here?

I used a flipnote from an online archive as a comparison:
You can see the frame as it should be parsed on the left and how it is actually parsed by my code on the right.

Thanks in advance.

I would check if you’re using float vs integer division.

You call this lambda function providing an offset with large numbers, and you don’t use it is that intentional?

At the end of read_metadata the file head is at 144 bytes
And you jump to 160 to read the thumbnail…

Bleh, just get a known small test file an put a debugger on this. Or better yet make a unit test!

Oh yeah, that was something I forgot to delete from an old version of the code.
The thumbnail is parsed properly (Thank god) and the offsets come from a documented reverse engineering of Flipnote files, so that’s why I jump to that specific offset.

Doesn’t seem to be the issue, thanks