first release of the integrity check mechanism

This commit is contained in:
Romain Bazile 2020-11-20 15:09:00 +01:00
parent ae29f8597e
commit 6009e07cd6
3 changed files with 297 additions and 0 deletions

122
scripts/bash/usb_backup.sh Executable file
View file

@ -0,0 +1,122 @@
#!/bin/bash
# Stolen from https://github.com/raamsri/automount-usb/blob/master/usb-mount.sh
# This work is licensed under the Unlicense
# This script is based on https://serverfault.com/a/767079 posted
# by Mike Blackwell, modified to our needs. Credits to the author.
# This script is called from systemd unit file to mount or unmount
# a USB drive.
PATH="$PATH:/usr/bin:/usr/local/bin:/usr/sbin:/usr/local/sbin:/bin:/sbin"
log="logger -t usb-backup.sh -s "
usage()
{
${log} "Usage: $0 device_name (e.g. sdb1)"
exit 1
}
if [[ $# -ne 1 ]]; then
usage
fi
DEVBASE=$1
DEVICE="/dev/${DEVBASE}"
SOURCE="/home/pi/data/" # source of files
# See if this drive is already mounted, and if so where
MOUNT_POINT=$(mount | grep "${DEVICE}" | awk '{ print $3 }')
DEV_LABEL=""
do_mount()
{
if [[ -n ${MOUNT_POINT} ]]; then
${log} "Warning: ${DEVICE} is already mounted at ${MOUNT_POINT}"
exit 1
fi
# Get info for this drive: $ID_FS_LABEL and $ID_FS_TYPE
eval '$(blkid -o udev "${DEVICE}" | grep -i -e "ID_FS_LABEL" -e "ID_FS_TYPE")'
# Figure out a mount point to use
LABEL=${ID_FS_LABEL}
if grep -q " /media/${LABEL} " /etc/mtab; then
# Already in use, make a unique one
LABEL+="-${DEVBASE}"
fi
DEV_LABEL="${LABEL}"
# Use the device name in case the drive doesn't have label
if [ -z "${DEV_LABEL}" ]; then
DEV_LABEL="${DEVBASE}"
fi
MOUNT_POINT="/media/${DEV_LABEL}"
${log} "Mount point: ${MOUNT_POINT}"
mkdir -p "${MOUNT_POINT}"
# Global mount options
OPTS="rw,relatime"
# File system type specific mount options
if [[ ${ID_FS_TYPE} == "vfat" ]]; then
OPTS+=",users,gid=100,umask=000,shortname=mixed,utf8=1,flush"
fi
if ! mount -o ${OPTS} "${DEVICE}" "${MOUNT_POINT}"; then
${log} "Error mounting ${DEVICE} (status = $?)"
rmdir "${MOUNT_POINT}"
exit 1
else
# Track the mounted drives
echo "${MOUNT_POINT}:${DEVBASE}" | cat >> "/var/log/usb-mount.track"
fi
${log} "Mounted ${DEVICE} at ${MOUNT_POINT}"
}
do_unmount()
{
if [[ -z ${MOUNT_POINT} ]]; then
${log} "Warning: ${DEVICE} is not mounted"
else
umount -l "${DEVICE}"
${log} "Unmounted ${DEVICE} from ${MOUNT_POINT}"
/bin/rmdir "${MOUNT_POINT}"
sed -i.bak "\@${MOUNT_POINT}@d" /var/log/usb-mount.track
fi
}
do_backup()
{
do_mount
if [[ -z ${MOUNT_POINT} ]]; then
${log} "Warning: ${DEVICE} is not mounted"
else
if [[ -f "${MOUNT_POINT}/planktoscope.backup" ]]; then
${log} "Starting to backup local files"
MACHINE=$(python3 -c "import planktoscope.uuidName as uuidName; print(uuidName.machineName(machine=uuidName.getSerial()).replace(' ','_'))")
BACKUP_FOLDER="${MOUNT_POINT}/planktoscope_data/${MACHINE}"
${log} "Machine name is ${MACHINE}, backup folder is ${BACKUP_FOLDER}"
mkdir -p "$BACKUP_FOLDER"
rsync -rtD --modify-window=1 --update --progress "$SOURCE" "$BACKUP_FOLDER"
# Ideally here, we should check for the integrity of files
else
${log} "Warning: ${DEVICE} does not contain the special file planktoscope.backup at its root"
fi
do_unmount
fi
}
do_backup

View file

@ -30,6 +30,9 @@ import planktoscope.imager_state_machine
# import raspimjpeg module # import raspimjpeg module
import planktoscope.raspimjpeg import planktoscope.raspimjpeg
# Integrity verification module
import planktoscope.integrity
################################################################################ ################################################################################
# Streaming PiCamera over server # Streaming PiCamera over server
@ -186,6 +189,10 @@ class ImagerProcess(multiprocessing.Process):
self.__shutter_speed = shutter_speed self.__shutter_speed = shutter_speed
self.__exposure_mode = "fixedfps" self.__exposure_mode = "fixedfps"
self.__base_path = "/home/pi/data/img" self.__base_path = "/home/pi/data/img"
# Let's make sure the base path exists
if not os.path.exists(self.__base_path):
os.makedirs(self.__base_path)
self.__export_path = "" self.__export_path = ""
self.__global_metadata = None self.__global_metadata = None
@ -455,6 +462,7 @@ class ImagerProcess(multiprocessing.Process):
str(self.__global_metadata["sample_id"]), str(self.__global_metadata["sample_id"]),
str(self.__global_metadata["acq_id"]), str(self.__global_metadata["acq_id"]),
) )
if not os.path.exists(self.__export_path): if not os.path.exists(self.__export_path):
# create the path! # create the path!
os.makedirs(self.__export_path) os.makedirs(self.__export_path)
@ -468,6 +476,14 @@ class ImagerProcess(multiprocessing.Process):
f"Metadata dumped in {metadata_file} are {self.__global_metadata}" f"Metadata dumped in {metadata_file} are {self.__global_metadata}"
) )
# Create the integrity file in this export path
try:
planktoscope.integrity.create_integrity_file(self.__export_path)
except FileExistsError as e:
logger.info(
f"The integrity file already exists in this export path {self.__export_path}"
)
# Sleep a duration before to start acquisition # Sleep a duration before to start acquisition
time.sleep(self.__sleep_before) time.sleep(self.__sleep_before)
@ -510,10 +526,30 @@ class ImagerProcess(multiprocessing.Process):
self.__camera.capture(filename_path) self.__camera.capture(filename_path)
except TimeoutError as e: except TimeoutError as e:
logger.error("A timeout happened while waiting for a capture to happen") logger.error("A timeout happened while waiting for a capture to happen")
# Publish the name of the image to via MQTT to Node-RED
self.imager_client.client.publish(
"status/imager",
f'{{"status":"Image {self.__img_done + 1}/{self.__img_goal} WAS NOT CAPTURED! STOPPING THE PROCESS!"}}',
)
# Reset the counter to 0
self.__img_done = 0
# Change state towards stop
self.__imager.change(planktoscope.imager_state_machine.Stop)
# Set the LEDs as Green
planktoscope.light.setRGB(0, 255, 255)
return
# Set the LEDs as Green # Set the LEDs as Green
planktoscope.light.setRGB(0, 255, 0) planktoscope.light.setRGB(0, 255, 0)
# Add the checksum of the captured image to the integrity file
try:
planktoscope.integrity.append_to_integrity_file(filename_path)
except FileNotFoundError as e:
logger.error(
f"{filename_path} was not found, the camera may not have worked properly!"
)
# Publish the name of the image to via MQTT to Node-RED # Publish the name of the image to via MQTT to Node-RED
self.imager_client.client.publish( self.imager_client.client.publish(
"status/imager", "status/imager",
@ -535,6 +571,7 @@ class ImagerProcess(multiprocessing.Process):
self.__imager.change(planktoscope.imager_state_machine.Stop) self.__imager.change(planktoscope.imager_state_machine.Stop)
# Set the LEDs as Green # Set the LEDs as Green
planktoscope.light.setRGB(0, 255, 255) planktoscope.light.setRGB(0, 255, 255)
return
else: else:
# We have not reached the final stage, let's keep imaging # We have not reached the final stage, let's keep imaging
# Set the LEDs as Blue # Set the LEDs as Blue

View file

@ -0,0 +1,138 @@
# This module calculates the checksum of created files and add them to and file called integrity.check
# The file is composed as follows:
# First, a header that starts by like so:
# # planktoscope integrity file, see https://www.planktoscope.org
# # filename,size,sha1
# The second line define the order of the informations are saved in, for now, it's all locked down
# The following lines exists one per file, with the name of the file, its size in bytes and its sha1 checksum
import os
import hashlib
# Logger library compatible with multiprocessing
from loguru import logger
def get_checksum(filename):
"""returns the sha1 checksum of the file
Args:
filename (string): file name of the file to calculate the checksum of
Returns:
string: sha1 checksum of the file
"""
logger.debug(f"Calculating the integrity of {filename}'s content")
if not os.path.exists(filename):
# The file does not exists!
logger.error(f"The file {filename} does not exists!")
raise FileNotFoundError
# since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us.
sha1 = hashlib.sha1() # nosec
with open(filename, "rb") as f:
while True:
# Let's read chunks in the algorithm block size we use
chunk = f.read(sha1.block_size)
if not chunk:
break
sha1.update(chunk)
return sha1.hexdigest()
def get_filename_checksum(filename):
"""returns the sha1 checksum of the filename, a null character and the data
Args:
filename (string): file name of the file to calculate the checksum of
Returns:
string: sha1 checksum of the filename and its content
"""
logger.debug(f"Calculating the integrity of {filename}'s content and its filename")
if not os.path.exists(filename):
# The file does not exists!
logger.error(f"The file {filename} does not exists!")
raise FileNotFoundError
# since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us.
sha1 = hashlib.sha1() # nosec
sha1.update(os.path.split(filename)[1].encode())
sha1.update("\00".encode())
with open(filename, "rb") as f:
while True:
# Let's read chunks in the algorithm block size we use
chunk = f.read(sha1.block_size)
if not chunk:
break
sha1.update(chunk)
return sha1.hexdigest()
def create_integrity_file(path):
"""Create an integrity file in the designated path
Args:
path (string): path where to create the integrity file
Raises:
FileExistsError: Raised if the integrity file already exists there.
"""
logger.debug(f"Create the integrity file in the folder {path}")
# check if the path already exists
if not os.path.exists(path):
# make sure the directory exists
os.makedirs(os.path.dirname(path), exist_ok=True)
integrity_file_path = os.path.join(path, "integrity.check")
if os.path.exists(integrity_file_path):
logger.error(f"The integrity file already exists in the folder {path}")
# The file already exists!
raise FileExistsError
# create the file
with open(integrity_file_path, "w") as file:
file.write("# planktoscope integrity file, see https://www.planktoscope.org\n")
file.write("# filename,size,sha1\n")
def append_to_integrity_file(filepath):
"""Append the information about a filename to the integrity file in its folder
Args:
filepath (string): path of the file to add to the integrity file
"""
# Append to the integrity file the specific file
if not os.path.exists(filepath):
logger.error(f"The file {filename} does not exists!")
raise FileNotFoundError
integrity_file_path = os.path.join(os.path.dirname(filepath), "integrity.check")
# Check that the integrity files exists
if not os.path.exists(integrity_file_path):
logger.debug(f"The integrity file does not exists in the folder of {filepath}")
create_integrity_file(os.path.dirname(filepath))
with open(integrity_file_path, "a") as file:
file.write(
f"{os.path.split(filepath)[1]},{os.path.getsize(filepath)},{get_filename_checksum(filepath)}\n"
)
def scan_path_to_integrity(path):
# TODO implement the method that add all files in the folder to the integrity file
pass
def check_path_integrity(path):
# TODO implement the method that recursively reads the integrity file of a repository and checks everything down to the file
pass
if __name__ == "__main__":
# TODO add here a way to check a folder integrity easily with a simple command line
# something like python3 scripts/planktoscope/integrity.py -c path/to/folder/to/check
pass