From 6009e07cd6070e1fabafb4eb16da16d94c40b753 Mon Sep 17 00:00:00 2001 From: Romain Bazile Date: Fri, 20 Nov 2020 15:09:00 +0100 Subject: [PATCH] first release of the integrity check mechanism --- scripts/bash/usb_backup.sh | 122 ++++++++++++++++++++++++++ scripts/planktoscope/imager.py | 37 ++++++++ scripts/planktoscope/integrity.py | 138 ++++++++++++++++++++++++++++++ 3 files changed, 297 insertions(+) create mode 100755 scripts/bash/usb_backup.sh create mode 100644 scripts/planktoscope/integrity.py diff --git a/scripts/bash/usb_backup.sh b/scripts/bash/usb_backup.sh new file mode 100755 index 0000000..83b95f6 --- /dev/null +++ b/scripts/bash/usb_backup.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# Stolen from https://github.com/raamsri/automount-usb/blob/master/usb-mount.sh + +# This work is licensed under the Unlicense + +# This script is based on https://serverfault.com/a/767079 posted +# by Mike Blackwell, modified to our needs. Credits to the author. + +# This script is called from systemd unit file to mount or unmount +# a USB drive. + +PATH="$PATH:/usr/bin:/usr/local/bin:/usr/sbin:/usr/local/sbin:/bin:/sbin" +log="logger -t usb-backup.sh -s " + +usage() +{ + ${log} "Usage: $0 device_name (e.g. sdb1)" + exit 1 +} + +if [[ $# -ne 1 ]]; then + usage +fi + +DEVBASE=$1 +DEVICE="/dev/${DEVBASE}" +SOURCE="/home/pi/data/" # source of files + +# See if this drive is already mounted, and if so where +MOUNT_POINT=$(mount | grep "${DEVICE}" | awk '{ print $3 }') + +DEV_LABEL="" + +do_mount() +{ + if [[ -n ${MOUNT_POINT} ]]; then + ${log} "Warning: ${DEVICE} is already mounted at ${MOUNT_POINT}" + exit 1 + fi + + # Get info for this drive: $ID_FS_LABEL and $ID_FS_TYPE + eval '$(blkid -o udev "${DEVICE}" | grep -i -e "ID_FS_LABEL" -e "ID_FS_TYPE")' + + # Figure out a mount point to use + LABEL=${ID_FS_LABEL} + if grep -q " /media/${LABEL} " /etc/mtab; then + # Already in use, make a unique one + LABEL+="-${DEVBASE}" + fi + DEV_LABEL="${LABEL}" + + # Use the device name in case the drive doesn't have label + if [ -z "${DEV_LABEL}" ]; then + DEV_LABEL="${DEVBASE}" + fi + + MOUNT_POINT="/media/${DEV_LABEL}" + + ${log} "Mount point: ${MOUNT_POINT}" + + mkdir -p "${MOUNT_POINT}" + + # Global mount options + OPTS="rw,relatime" + + # File system type specific mount options + if [[ ${ID_FS_TYPE} == "vfat" ]]; then + OPTS+=",users,gid=100,umask=000,shortname=mixed,utf8=1,flush" + fi + + if ! mount -o ${OPTS} "${DEVICE}" "${MOUNT_POINT}"; then + ${log} "Error mounting ${DEVICE} (status = $?)" + rmdir "${MOUNT_POINT}" + exit 1 + else + # Track the mounted drives + echo "${MOUNT_POINT}:${DEVBASE}" | cat >> "/var/log/usb-mount.track" + fi + + ${log} "Mounted ${DEVICE} at ${MOUNT_POINT}" +} + +do_unmount() +{ + if [[ -z ${MOUNT_POINT} ]]; then + ${log} "Warning: ${DEVICE} is not mounted" + else + umount -l "${DEVICE}" + ${log} "Unmounted ${DEVICE} from ${MOUNT_POINT}" + /bin/rmdir "${MOUNT_POINT}" + sed -i.bak "\@${MOUNT_POINT}@d" /var/log/usb-mount.track + fi + + +} + + +do_backup() +{ + do_mount + if [[ -z ${MOUNT_POINT} ]]; then + ${log} "Warning: ${DEVICE} is not mounted" + else + if [[ -f "${MOUNT_POINT}/planktoscope.backup" ]]; then + ${log} "Starting to backup local files" + MACHINE=$(python3 -c "import planktoscope.uuidName as uuidName; print(uuidName.machineName(machine=uuidName.getSerial()).replace(' ','_'))") + BACKUP_FOLDER="${MOUNT_POINT}/planktoscope_data/${MACHINE}" + ${log} "Machine name is ${MACHINE}, backup folder is ${BACKUP_FOLDER}" + mkdir -p "$BACKUP_FOLDER" + rsync -rtD --modify-window=1 --update --progress "$SOURCE" "$BACKUP_FOLDER" + # Ideally here, we should check for the integrity of files + else + ${log} "Warning: ${DEVICE} does not contain the special file planktoscope.backup at its root" + fi + do_unmount + fi +} + + +do_backup + + diff --git a/scripts/planktoscope/imager.py b/scripts/planktoscope/imager.py index 9a6edaf..81e4628 100644 --- a/scripts/planktoscope/imager.py +++ b/scripts/planktoscope/imager.py @@ -30,6 +30,9 @@ import planktoscope.imager_state_machine # import raspimjpeg module import planktoscope.raspimjpeg +# Integrity verification module +import planktoscope.integrity + ################################################################################ # Streaming PiCamera over server @@ -186,6 +189,10 @@ class ImagerProcess(multiprocessing.Process): self.__shutter_speed = shutter_speed self.__exposure_mode = "fixedfps" self.__base_path = "/home/pi/data/img" + # Let's make sure the base path exists + if not os.path.exists(self.__base_path): + os.makedirs(self.__base_path) + self.__export_path = "" self.__global_metadata = None @@ -455,6 +462,7 @@ class ImagerProcess(multiprocessing.Process): str(self.__global_metadata["sample_id"]), str(self.__global_metadata["acq_id"]), ) + if not os.path.exists(self.__export_path): # create the path! os.makedirs(self.__export_path) @@ -468,6 +476,14 @@ class ImagerProcess(multiprocessing.Process): f"Metadata dumped in {metadata_file} are {self.__global_metadata}" ) + # Create the integrity file in this export path + try: + planktoscope.integrity.create_integrity_file(self.__export_path) + except FileExistsError as e: + logger.info( + f"The integrity file already exists in this export path {self.__export_path}" + ) + # Sleep a duration before to start acquisition time.sleep(self.__sleep_before) @@ -510,10 +526,30 @@ class ImagerProcess(multiprocessing.Process): self.__camera.capture(filename_path) except TimeoutError as e: logger.error("A timeout happened while waiting for a capture to happen") + # Publish the name of the image to via MQTT to Node-RED + self.imager_client.client.publish( + "status/imager", + f'{{"status":"Image {self.__img_done + 1}/{self.__img_goal} WAS NOT CAPTURED! STOPPING THE PROCESS!"}}', + ) + # Reset the counter to 0 + self.__img_done = 0 + # Change state towards stop + self.__imager.change(planktoscope.imager_state_machine.Stop) + # Set the LEDs as Green + planktoscope.light.setRGB(0, 255, 255) + return # Set the LEDs as Green planktoscope.light.setRGB(0, 255, 0) + # Add the checksum of the captured image to the integrity file + try: + planktoscope.integrity.append_to_integrity_file(filename_path) + except FileNotFoundError as e: + logger.error( + f"{filename_path} was not found, the camera may not have worked properly!" + ) + # Publish the name of the image to via MQTT to Node-RED self.imager_client.client.publish( "status/imager", @@ -535,6 +571,7 @@ class ImagerProcess(multiprocessing.Process): self.__imager.change(planktoscope.imager_state_machine.Stop) # Set the LEDs as Green planktoscope.light.setRGB(0, 255, 255) + return else: # We have not reached the final stage, let's keep imaging # Set the LEDs as Blue diff --git a/scripts/planktoscope/integrity.py b/scripts/planktoscope/integrity.py new file mode 100644 index 0000000..11d5195 --- /dev/null +++ b/scripts/planktoscope/integrity.py @@ -0,0 +1,138 @@ +# This module calculates the checksum of created files and add them to and file called integrity.check +# The file is composed as follows: +# First, a header that starts by like so: +# # planktoscope integrity file, see https://www.planktoscope.org +# # filename,size,sha1 +# The second line define the order of the informations are saved in, for now, it's all locked down +# The following lines exists one per file, with the name of the file, its size in bytes and its sha1 checksum + + +import os +import hashlib + +# Logger library compatible with multiprocessing +from loguru import logger + + +def get_checksum(filename): + """returns the sha1 checksum of the file + + Args: + filename (string): file name of the file to calculate the checksum of + + Returns: + string: sha1 checksum of the file + """ + logger.debug(f"Calculating the integrity of {filename}'s content") + if not os.path.exists(filename): + # The file does not exists! + logger.error(f"The file {filename} does not exists!") + raise FileNotFoundError + + # since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us. + sha1 = hashlib.sha1() # nosec + with open(filename, "rb") as f: + while True: + # Let's read chunks in the algorithm block size we use + chunk = f.read(sha1.block_size) + if not chunk: + break + sha1.update(chunk) + + return sha1.hexdigest() + + +def get_filename_checksum(filename): + """returns the sha1 checksum of the filename, a null character and the data + + Args: + filename (string): file name of the file to calculate the checksum of + + Returns: + string: sha1 checksum of the filename and its content + """ + logger.debug(f"Calculating the integrity of {filename}'s content and its filename") + if not os.path.exists(filename): + # The file does not exists! + logger.error(f"The file {filename} does not exists!") + raise FileNotFoundError + + # since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us. + sha1 = hashlib.sha1() # nosec + sha1.update(os.path.split(filename)[1].encode()) + sha1.update("\00".encode()) + with open(filename, "rb") as f: + while True: + # Let's read chunks in the algorithm block size we use + chunk = f.read(sha1.block_size) + if not chunk: + break + sha1.update(chunk) + + return sha1.hexdigest() + + +def create_integrity_file(path): + """Create an integrity file in the designated path + + Args: + path (string): path where to create the integrity file + + Raises: + FileExistsError: Raised if the integrity file already exists there. + """ + logger.debug(f"Create the integrity file in the folder {path}") + # check if the path already exists + if not os.path.exists(path): + # make sure the directory exists + os.makedirs(os.path.dirname(path), exist_ok=True) + + integrity_file_path = os.path.join(path, "integrity.check") + if os.path.exists(integrity_file_path): + logger.error(f"The integrity file already exists in the folder {path}") + # The file already exists! + raise FileExistsError + + # create the file + with open(integrity_file_path, "w") as file: + file.write("# planktoscope integrity file, see https://www.planktoscope.org\n") + file.write("# filename,size,sha1\n") + + +def append_to_integrity_file(filepath): + """Append the information about a filename to the integrity file in its folder + + Args: + filepath (string): path of the file to add to the integrity file + """ + # Append to the integrity file the specific file + if not os.path.exists(filepath): + logger.error(f"The file {filename} does not exists!") + raise FileNotFoundError + + integrity_file_path = os.path.join(os.path.dirname(filepath), "integrity.check") + # Check that the integrity files exists + if not os.path.exists(integrity_file_path): + logger.debug(f"The integrity file does not exists in the folder of {filepath}") + create_integrity_file(os.path.dirname(filepath)) + + with open(integrity_file_path, "a") as file: + file.write( + f"{os.path.split(filepath)[1]},{os.path.getsize(filepath)},{get_filename_checksum(filepath)}\n" + ) + + +def scan_path_to_integrity(path): + # TODO implement the method that add all files in the folder to the integrity file + pass + + +def check_path_integrity(path): + # TODO implement the method that recursively reads the integrity file of a repository and checks everything down to the file + pass + + +if __name__ == "__main__": + # TODO add here a way to check a folder integrity easily with a simple command line + # something like python3 scripts/planktoscope/integrity.py -c path/to/folder/to/check + pass \ No newline at end of file