seekia/internal/genetics/myGenomes/myGenomes.go

448 lines
15 KiB
Go

// myGenomes provides functions to store a user's raw genome files
// These are exported from sequencing companies like 23andMe and AncestryDNA
package myGenomes
import "seekia/internal/cryptography/blake3"
import "seekia/internal/encoding"
import "seekia/internal/genetics/prepareRawGenomes"
import "seekia/internal/genetics/readRawGenomes"
import "seekia/internal/helpers"
import "seekia/internal/localFilesystem"
import "seekia/internal/myDatastores/myMapList"
import "path/filepath"
import "time"
import "sync"
import "errors"
import "strings"
//TODO: Delete unused raw genome files
// This will be locked anytime Genomes are being added/deleted
var updatingMyGenomesMutex sync.Mutex
var myGenomesMapListDatastore *myMapList.MyMapList
// This function must be called whenever an app user signs in
func CreateUserGenomesFolder() error{
userDirectory, err := localFilesystem.GetAppUserFolderPath()
if (err != nil) { return err }
myGenomesFolderPath := filepath.Join(userDirectory, "MyGenomes")
_, err = localFilesystem.CreateFolder(myGenomesFolderPath)
if (err != nil) { return err }
return nil
}
// This function must be called whenever an app user signs in
func InitializeMyGenomeDatastore()error{
updatingMyGenomesMutex.Lock()
defer updatingMyGenomesMutex.Unlock()
newMyGenomesMapListDatastore, err := myMapList.CreateNewMapList("MyGenomes")
if (err != nil) { return err }
myGenomesMapListDatastore = newMyGenomesMapListDatastore
return nil
}
//Outputs:
// -[]map[string]string
// -PersonIdentifier -> Identifier of Genome Person
// -GenomeIdentifier -> Genome identifier (this is the name of the .txt file stored on disk)
// -TimeExported -> Time the genome file was exported from company
// -TimeImported -> Time the genome was imported into Seekia
// -IsPhased -> "Yes"/"No"
// -SNPCount -> Number of readable SNPs in file
// -CompanyName -> Company name ("23andMe", "AncestryDNA")
// -ImportVersion -> Import version for the company from which the metadata was retrieved
// -FileHash -> 256 bits Blake3 hash of the genome file, encoded in Hex
// -error
func GetMyRawGenomesMetadataMapList()([]map[string]string, error){
myRawGenomesMapList, err := myGenomesMapListDatastore.GetMapList()
if (err != nil) { return nil, err }
return myRawGenomesMapList, nil
}
//Outputs:
// -bool: File is valid
// -bool: File already exists
// -error
func AddRawGenome(personIdentifier string, rawGenomeString string)(bool, bool, error){
isValid := helpers.VerifyHexString(15, personIdentifier)
if (isValid == false) {
return false, false, errors.New("AddRawGenome called with invalid personIdentifier: " + personIdentifier)
}
updatingMyGenomesMutex.Lock()
defer updatingMyGenomesMutex.Unlock()
currentFileHash, err := blake3.GetBlake3HashAsHexString(32, []byte(rawGenomeString))
if (err != nil) { return false, false, err }
// We check to see if this file has already been imported for this Person
lookupMap := map[string]string{
"PersonIdentifier": personIdentifier,
"FileHash": currentFileHash,
}
anyItemFound, _, err := myGenomesMapListDatastore.GetMapListItems(lookupMap)
if (err != nil) { return false, false, err }
if (anyItemFound == true){
// Genome already exists
return true, true, nil
}
// Genome is new. We will add it to the map list and copy the file to Seekia local storage
rawGenomeReader := strings.NewReader(rawGenomeString)
companyName, importVersion, timeFileWasGenerated, snpCount, genomeIsPhased, rawGenomeMap, err := readRawGenomes.ReadRawGenomeFile(rawGenomeReader)
if (err != nil){
return false, false, nil
}
genomeHasUsefulLocations, _, err := prepareRawGenomes.ConvertRawGenomeToGenomeMap(rawGenomeMap, genomeIsPhased)
if (err != nil) { return false, false, err }
if (genomeHasUsefulLocations == false){
//TODO: Explain this to the user rather than just telling the user that the file is invalid
return false, false, nil
}
genomeIdentifier, err := helpers.GetNewRandomHexString(16)
if (err != nil) { return false, false, err }
importVersionString := helpers.ConvertIntToString(importVersion)
timeExported := helpers.ConvertInt64ToString(timeFileWasGenerated)
timeImported := time.Now().Unix()
timeImportedString := helpers.ConvertInt64ToString(timeImported)
isPhasedString := helpers.ConvertBoolToYesOrNoString(genomeIsPhased)
snpCountString := helpers.ConvertInt64ToString(snpCount)
newGenomeMap := map[string]string{
"PersonIdentifier": personIdentifier,
"GenomeIdentifier": genomeIdentifier,
"TimeExported": timeExported,
"TimeImported": timeImportedString,
"IsPhased": isPhasedString,
"SNPCount": snpCountString,
"CompanyName": companyName,
"ImportVersion": importVersionString,
"FileHash": currentFileHash,
}
err = myGenomesMapListDatastore.AddMapListItem(newGenomeMap)
if (err != nil) { return false, false, err }
userDirectory, err := localFilesystem.GetAppUserFolderPath()
if (err != nil) { return false, false, err }
myGenomesFolderPath := filepath.Join(userDirectory, "MyGenomes")
genomeFileName := genomeIdentifier + ".txt"
err = localFilesystem.CreateOrOverwriteFile([]byte(rawGenomeString), myGenomesFolderPath, genomeFileName)
if (err != nil) { return false, false, err }
return true, false, nil
}
func DeleteMyRawGenome(genomeIdentifier [16]byte)error{
updatingMyGenomesMutex.Lock()
defer updatingMyGenomesMutex.Unlock()
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
mapToDelete := map[string]string{
"GenomeIdentifier": genomeIdentifierHex,
}
err := myGenomesMapListDatastore.DeleteMapListItems(mapToDelete)
if (err != nil) { return err }
userDirectory, err := localFilesystem.GetAppUserFolderPath()
if (err != nil) { return err }
genomeFileName := genomeIdentifierHex + ".txt"
genomeFilePath := filepath.Join(userDirectory, "MyGenomes", genomeFileName)
_, err = localFilesystem.DeleteFileOrFolder(genomeFilePath)
if (err != nil) { return err }
return nil
}
//Outputs:
// -bool: Genome found
// -string: Person identifier
// -int64: Time Genome was exported from company
// -int64: Time genome was imported into Seekia
// -bool: Is Phased
// -int64: SNP Count
// -string: CompanyName
// -int: Import version
// -string: FileHash
// -error
func GetMyRawGenomeMetadata(genomeIdentifier [16]byte)(bool, string, int64, int64, bool, int64, string, int, string, error){
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
if (genomeIdentifierHex == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" || genomeIdentifierHex == "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"){
// These are the genome identifiers we use for example reports
// These are used to show the user what a genetic analysis would look like
getPersonIdentifier := func()string{
if (genomeIdentifierHex == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"){
return "111111111111111111111111111111"
}
return "222222222222222222222222222222"
}
personIdentifier := getPersonIdentifier()
return true, personIdentifier, 0, 0, false, 676720, "AncestryDNA", 1, "", nil
}
lookupMap := map[string]string{
"GenomeIdentifier": genomeIdentifierHex,
}
anyItemFound, foundItemsMapList, err := myGenomesMapListDatastore.GetMapListItems(lookupMap)
if (anyItemFound == false){
return false, "", 0, 0, false, 0, "", 0, "", nil
}
if (len(foundItemsMapList) != 1){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Contains multiple entries for same GenomeIdentifier")
}
genomeMap := foundItemsMapList[0]
personIdentifier, exists := genomeMap["PersonIdentifier"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing PersonIdentifier")
}
timeExported, exists := genomeMap["TimeExported"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing TimeExported")
}
timeImported, exists := genomeMap["TimeImported"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing TimeImported")
}
isPhased, exists := genomeMap["IsPhased"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing IsPhased")
}
snpCount, exists := genomeMap["SNPCount"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing SNPCount")
}
companyName, exists := genomeMap["CompanyName"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing CompanyName")
}
importVersion, exists := genomeMap["ImportVersion"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing ImportVersion")
}
fileHash, exists := genomeMap["FileHash"]
if (exists == false){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing FileHash")
}
timeExportedInt64, err := helpers.ConvertStringToInt64(timeExported)
if (err != nil){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid TimeExported: " + timeExported)
}
timeImportedInt64, err := helpers.ConvertStringToInt64(timeImported)
if (err != nil){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid TimeImported: " + timeImported)
}
isPhasedBool, err := helpers.ConvertYesOrNoStringToBool(isPhased)
if (err != nil) {
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid isPhased: " + isPhased)
}
snpCountInt64, err := helpers.ConvertStringToInt64(snpCount)
if (err != nil){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid snpCount: " + snpCount)
}
importVersionInt, err := helpers.ConvertStringToInt(importVersion)
if (err != nil){
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid ImportVersion: " + importVersion)
}
return true, personIdentifier, timeExportedInt64, timeImportedInt64, isPhasedBool, snpCountInt64, companyName, importVersionInt, fileHash, nil
}
// This function is used to refresh the genome metadata when a new import version is available
func RefreshRawGenomeMetadata(genomeIdentifier [16]byte)error{
updatingMyGenomesMutex.Lock()
defer updatingMyGenomesMutex.Unlock()
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
myGenomesMapList, err := myGenomesMapListDatastore.GetMapList()
if (err != nil) { return err }
foundGenome := false
for _, genomeMap := range myGenomesMapList{
currentGenomeIdentifier, exists := genomeMap["GenomeIdentifier"]
if (exists == false){
return errors.New("myGenomesMapList item is malformed: item missing GenomeIdentifier.")
}
if (currentGenomeIdentifier != genomeIdentifierHex){
continue
}
if (foundGenome == true){
return errors.New("myGenomesMapList is malformed: Multiple entries for the same GenomeIdentifier exist.")
}
foundGenome = true
rawGenomeString, err := GetGenomeRawDataString(genomeIdentifier)
if (err != nil){ return err }
rawGenomeReader := strings.NewReader(rawGenomeString)
companyName, importVersion, timeFileWasGenerated, snpCount, genomeIsPhased, _, err := readRawGenomes.ReadRawGenomeFile(rawGenomeReader)
if (err != nil){
// Could be that file was importable via old import version, but new import version rejects it.
// That would still be bad undesireable behavior.
return errors.New("Unable to import raw genome during RefreshRawGenomeMetadata: " + err.Error())
}
importVersionString := helpers.ConvertIntToString(importVersion)
timeExported := helpers.ConvertInt64ToString(timeFileWasGenerated)
isPhasedString := helpers.ConvertBoolToYesOrNoString(genomeIsPhased)
snpCountString := helpers.ConvertInt64ToString(snpCount)
genomeMap["TimeExported"] = timeExported
genomeMap["IsPhased"] = isPhasedString
genomeMap["SNPCount"] = snpCountString
genomeMap["CompanyName"] = companyName
genomeMap["ImportVersion"] = importVersionString
}
if (foundGenome == false){
return errors.New("Genome not found during RefreshRawGenomeMetadata")
}
err = myGenomesMapListDatastore.OverwriteMapList(myGenomesMapList)
if (err != nil) { return err }
return nil
}
//Outputs:
// -string: Genome raw data string
// -error
func GetGenomeRawDataString(genomeIdentifier [16]byte)(string, error){
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
userDirectory, err := localFilesystem.GetAppUserFolderPath()
if (err != nil) { return "", err }
genomeFileName := genomeIdentifierHex + ".txt"
genomeFilePath := filepath.Join(userDirectory, "MyGenomes", genomeFileName)
fileExists, fileBytes, err := localFilesystem.GetFileContents(genomeFilePath)
if (err != nil) { return "", err }
if (fileExists == false){
return "", errors.New("GetGenomeRawDataString called with genome whose file we cannot find.")
}
fileString := string(fileBytes)
return fileString, nil
}
// Returns all genomes for a person
func GetAllPersonGenomesMapList(personIdentifier string)([]map[string]string, error){
lookupMap := map[string]string{
"PersonIdentifier": personIdentifier,
}
anyItemsFound, matchingItemsMapList, err := myGenomesMapListDatastore.GetMapListItems(lookupMap)
if (err != nil) { return nil, err }
if (anyItemsFound == false){
emptyMapList := make([]map[string]string, 0)
return emptyMapList, nil
}
return matchingItemsMapList, nil
}
// This will not include any calculated genome identifiers, which only exist within analyses
func GetAllPersonRawGenomeIdentifiersList(personIdentifier string)([][16]byte, error){
allPersonGenomesMapList, err := GetAllPersonGenomesMapList(personIdentifier)
if (err != nil) { return nil, err }
personGenomeIdentifiersList := make([][16]byte, 0, len(allPersonGenomesMapList))
for _, genomeMap := range allPersonGenomesMapList{
genomeIdentifierHex, exists := genomeMap["GenomeIdentifier"]
if (exists == false){
return nil, errors.New("Malformed myGenomesMapList: Item missing GenomeIdentifier")
}
genomeIdentifier, err := encoding.DecodeHexStringTo16ByteArray(genomeIdentifierHex)
if (err != nil){
return nil, errors.New("Malformed myGenomesMapList: Item contains invalid GenomeIdentifier: " + genomeIdentifierHex)
}
personGenomeIdentifiersList = append(personGenomeIdentifiersList, genomeIdentifier)
}
return personGenomeIdentifiersList, nil
}
func DeleteAllPersonGenomes(personIdentifier string)error{
updatingMyGenomesMutex.Lock()
defer updatingMyGenomesMutex.Unlock()
mapToDelete := map[string]string{
"PersonIdentifier": personIdentifier,
}
err := myGenomesMapListDatastore.DeleteMapListItems(mapToDelete)
if (err != nil) { return err }
return nil
}