integrity: this and udev rule closes #15
This commit is contained in:
parent
6009e07cd6
commit
3bb27b9e15
|
@ -107,8 +107,11 @@ do_backup()
|
||||||
BACKUP_FOLDER="${MOUNT_POINT}/planktoscope_data/${MACHINE}"
|
BACKUP_FOLDER="${MOUNT_POINT}/planktoscope_data/${MACHINE}"
|
||||||
${log} "Machine name is ${MACHINE}, backup folder is ${BACKUP_FOLDER}"
|
${log} "Machine name is ${MACHINE}, backup folder is ${BACKUP_FOLDER}"
|
||||||
mkdir -p "$BACKUP_FOLDER"
|
mkdir -p "$BACKUP_FOLDER"
|
||||||
rsync -rtD --modify-window=1 --update --progress "$SOURCE" "$BACKUP_FOLDER"
|
rsync -rtD --modify-window=1 --update "$SOURCE" "$BACKUP_FOLDER"
|
||||||
# Ideally here, we should check for the integrity of files
|
if ! python3 -m planktoscope.integrity -c "$BACKUP_FOLDER"; then
|
||||||
|
${log} "ERROR: The files were corrupted during the copy!"
|
||||||
|
else
|
||||||
|
${log} "All files copied successfully!"
|
||||||
else
|
else
|
||||||
${log} "Warning: ${DEVICE} does not contain the special file planktoscope.backup at its root"
|
${log} "Warning: ${DEVICE} does not contain the special file planktoscope.backup at its root"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -13,25 +13,27 @@ import hashlib
|
||||||
# Logger library compatible with multiprocessing
|
# Logger library compatible with multiprocessing
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
integrity_file_name = "integrity.check"
|
||||||
|
|
||||||
def get_checksum(filename):
|
|
||||||
|
def get_checksum(filepath):
|
||||||
"""returns the sha1 checksum of the file
|
"""returns the sha1 checksum of the file
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
filename (string): file name of the file to calculate the checksum of
|
filepath (string): file name of the file to calculate the checksum of
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
string: sha1 checksum of the file
|
string: sha1 checksum of the file
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Calculating the integrity of {filename}'s content")
|
logger.debug(f"Calculating the integrity of {filepath}'s content")
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filepath):
|
||||||
# The file does not exists!
|
# The file does not exists!
|
||||||
logger.error(f"The file {filename} does not exists!")
|
logger.error(f"The file {filepath} does not exists!")
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
|
|
||||||
# since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us.
|
# since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us.
|
||||||
sha1 = hashlib.sha1() # nosec
|
sha1 = hashlib.sha1() # nosec
|
||||||
with open(filename, "rb") as f:
|
with open(filepath, "rb") as f:
|
||||||
while True:
|
while True:
|
||||||
# Let's read chunks in the algorithm block size we use
|
# Let's read chunks in the algorithm block size we use
|
||||||
chunk = f.read(sha1.block_size)
|
chunk = f.read(sha1.block_size)
|
||||||
|
@ -42,26 +44,26 @@ def get_checksum(filename):
|
||||||
return sha1.hexdigest()
|
return sha1.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def get_filename_checksum(filename):
|
def get_filename_checksum(filepath):
|
||||||
"""returns the sha1 checksum of the filename, a null character and the data
|
"""returns the sha1 checksum of the filename, a null character and the data
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
filename (string): file name of the file to calculate the checksum of
|
filepath (string): file name of the file to calculate the checksum of
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
string: sha1 checksum of the filename and its content
|
string: sha1 checksum of the filename and its content
|
||||||
"""
|
"""
|
||||||
logger.debug(f"Calculating the integrity of {filename}'s content and its filename")
|
logger.debug(f"Calculating the integrity of {filepath}'s content and its filename")
|
||||||
if not os.path.exists(filename):
|
if not os.path.exists(filepath):
|
||||||
# The file does not exists!
|
# The file does not exists!
|
||||||
logger.error(f"The file {filename} does not exists!")
|
logger.error(f"The file {filepath} does not exists!")
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
|
|
||||||
# since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us.
|
# since we are just doing integrity verification, we can use an "insecure" hashing algorithm. If it's good for git, it's good for us.
|
||||||
sha1 = hashlib.sha1() # nosec
|
sha1 = hashlib.sha1() # nosec
|
||||||
sha1.update(os.path.split(filename)[1].encode())
|
sha1.update(os.path.split(filepath)[1].encode())
|
||||||
sha1.update("\00".encode())
|
sha1.update("\00".encode())
|
||||||
with open(filename, "rb") as f:
|
with open(filepath, "rb") as f:
|
||||||
while True:
|
while True:
|
||||||
# Let's read chunks in the algorithm block size we use
|
# Let's read chunks in the algorithm block size we use
|
||||||
chunk = f.read(sha1.block_size)
|
chunk = f.read(sha1.block_size)
|
||||||
|
@ -87,7 +89,7 @@ def create_integrity_file(path):
|
||||||
# make sure the directory exists
|
# make sure the directory exists
|
||||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||||
|
|
||||||
integrity_file_path = os.path.join(path, "integrity.check")
|
integrity_file_path = os.path.join(path, integrity_file_name)
|
||||||
if os.path.exists(integrity_file_path):
|
if os.path.exists(integrity_file_path):
|
||||||
logger.error(f"The integrity file already exists in the folder {path}")
|
logger.error(f"The integrity file already exists in the folder {path}")
|
||||||
# The file already exists!
|
# The file already exists!
|
||||||
|
@ -110,7 +112,7 @@ def append_to_integrity_file(filepath):
|
||||||
logger.error(f"The file {filename} does not exists!")
|
logger.error(f"The file {filename} does not exists!")
|
||||||
raise FileNotFoundError
|
raise FileNotFoundError
|
||||||
|
|
||||||
integrity_file_path = os.path.join(os.path.dirname(filepath), "integrity.check")
|
integrity_file_path = os.path.join(os.path.dirname(filepath), integrity_file_name)
|
||||||
# Check that the integrity files exists
|
# Check that the integrity files exists
|
||||||
if not os.path.exists(integrity_file_path):
|
if not os.path.exists(integrity_file_path):
|
||||||
logger.debug(f"The integrity file does not exists in the folder of {filepath}")
|
logger.debug(f"The integrity file does not exists in the folder of {filepath}")
|
||||||
|
@ -127,12 +129,77 @@ def scan_path_to_integrity(path):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def check_integrity(path):
|
||||||
|
valid = []
|
||||||
|
not_valid = []
|
||||||
|
integrity_file_path = os.path.join(path, integrity_file_name)
|
||||||
|
|
||||||
|
with open(integrity_file_path, "r") as integrity_file:
|
||||||
|
if integrity_file.readline().startswith(
|
||||||
|
"#"
|
||||||
|
) and integrity_file.readline().startswith("#"):
|
||||||
|
for line in integrity_file:
|
||||||
|
filename, size, checksum = line.rstrip().split(",")
|
||||||
|
filepath = os.path.join(path, filename)
|
||||||
|
actual_checksum = get_filename_checksum(filepath)
|
||||||
|
actual_size = os.path.getsize(filepath)
|
||||||
|
if actual_checksum == checksum and actual_size == int(size):
|
||||||
|
valid.append(filename)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"{filename} with checksum {actual_checksum} vs {checksum} and size {actual_size} vs {size} is not valid"
|
||||||
|
)
|
||||||
|
not_valid.append(filename)
|
||||||
|
else:
|
||||||
|
print(f"The integrity file at {integrity_file_path} is not valid")
|
||||||
|
return (valid, not_valid)
|
||||||
|
|
||||||
|
|
||||||
def check_path_integrity(path):
|
def check_path_integrity(path):
|
||||||
# TODO implement the method that recursively reads the integrity file of a repository and checks everything down to the file
|
# TODO implement the method that recursively reads the integrity file of a repository and checks everything down to the file
|
||||||
pass
|
# Recursively scan all directories and save the ones with an integrity file in them
|
||||||
|
to_scan = [
|
||||||
|
root for root, dirs, files in os.walk(path) if integrity_file_name in files
|
||||||
|
]
|
||||||
|
|
||||||
|
valid_list = []
|
||||||
|
not_valid_list = []
|
||||||
|
for folder in to_scan:
|
||||||
|
valid, not_valid = check_integrity(folder)
|
||||||
|
valid_list += valid
|
||||||
|
not_valid_list += not_valid
|
||||||
|
if not_valid_list:
|
||||||
|
print(
|
||||||
|
f"{len(not_valid_list)} inconsistent file(s) have been found, please check them manually"
|
||||||
|
)
|
||||||
|
print(not_valid_list)
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# TODO add here a way to check a folder integrity easily with a simple command line
|
import sys
|
||||||
# something like python3 scripts/planktoscope/integrity.py -c path/to/folder/to/check
|
|
||||||
pass
|
logger.remove()
|
||||||
|
|
||||||
|
if len(sys.argv) > 2:
|
||||||
|
# let's check them arguments
|
||||||
|
if sys.argv[1] != "-c":
|
||||||
|
print(
|
||||||
|
"To check the integrity of files in a given folder, please use python3 -m planktoscope.integrity -c /path/to/folder"
|
||||||
|
)
|
||||||
|
exit(0)
|
||||||
|
else:
|
||||||
|
path_to_check = sys.argv[2]
|
||||||
|
# let's check if the path exists
|
||||||
|
if not os.path.exists(path_to_check):
|
||||||
|
print("The path to check doesn't exists")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# the path exists, let's check it!
|
||||||
|
error = check_path_integrity(path_to_check)
|
||||||
|
if error:
|
||||||
|
exit(error)
|
||||||
|
print("All the files are valid")
|
||||||
|
|
||||||
|
exit(0)
|
Loading…
Reference in a new issue