448 lines
15 KiB
Go
448 lines
15 KiB
Go
|
|
// myGenomes provides functions to store a user's raw genome files
|
|
// These are exported from sequencing companies like 23andMe and AncestryDNA
|
|
|
|
package myGenomes
|
|
|
|
import "seekia/internal/cryptography/blake3"
|
|
import "seekia/internal/encoding"
|
|
import "seekia/internal/genetics/prepareRawGenomes"
|
|
import "seekia/internal/genetics/readRawGenomes"
|
|
import "seekia/internal/helpers"
|
|
import "seekia/internal/localFilesystem"
|
|
import "seekia/internal/myDatastores/myMapList"
|
|
|
|
import "path/filepath"
|
|
import "time"
|
|
import "sync"
|
|
import "errors"
|
|
import "strings"
|
|
|
|
//TODO: Delete unused raw genome files
|
|
|
|
// This will be locked anytime Genomes are being added/deleted
|
|
var updatingMyGenomesMutex sync.Mutex
|
|
|
|
var myGenomesMapListDatastore *myMapList.MyMapList
|
|
|
|
// This function must be called whenever an app user signs in
|
|
func CreateUserGenomesFolder() error{
|
|
|
|
userDirectory, err := localFilesystem.GetAppUserFolderPath()
|
|
if (err != nil) { return err }
|
|
|
|
myGenomesFolderPath := filepath.Join(userDirectory, "MyGenomes")
|
|
|
|
_, err = localFilesystem.CreateFolder(myGenomesFolderPath)
|
|
if (err != nil) { return err }
|
|
|
|
return nil
|
|
}
|
|
|
|
// This function must be called whenever an app user signs in
|
|
func InitializeMyGenomeDatastore()error{
|
|
|
|
updatingMyGenomesMutex.Lock()
|
|
defer updatingMyGenomesMutex.Unlock()
|
|
|
|
newMyGenomesMapListDatastore, err := myMapList.CreateNewMapList("MyGenomes")
|
|
if (err != nil) { return err }
|
|
|
|
myGenomesMapListDatastore = newMyGenomesMapListDatastore
|
|
|
|
return nil
|
|
}
|
|
|
|
//Outputs:
|
|
// -[]map[string]string
|
|
// -PersonIdentifier -> Identifier of Genome Person
|
|
// -GenomeIdentifier -> Genome identifier (this is the name of the .txt file stored on disk)
|
|
// -TimeExported -> Time the genome file was exported from company
|
|
// -TimeImported -> Time the genome was imported into Seekia
|
|
// -IsPhased -> "Yes"/"No"
|
|
// -SNPCount -> Number of readable SNPs in file
|
|
// -CompanyName -> Company name ("23andMe", "AncestryDNA")
|
|
// -ImportVersion -> Import version for the company from which the metadata was retrieved
|
|
// -FileHash -> 256 bits Blake3 hash of the genome file, encoded in Hex
|
|
// -error
|
|
func GetMyRawGenomesMetadataMapList()([]map[string]string, error){
|
|
|
|
myRawGenomesMapList, err := myGenomesMapListDatastore.GetMapList()
|
|
if (err != nil) { return nil, err }
|
|
|
|
return myRawGenomesMapList, nil
|
|
}
|
|
|
|
//Outputs:
|
|
// -bool: File is valid
|
|
// -bool: File already exists
|
|
// -error
|
|
func AddRawGenome(personIdentifier string, rawGenomeString string)(bool, bool, error){
|
|
|
|
isValid := helpers.VerifyHexString(15, personIdentifier)
|
|
if (isValid == false) {
|
|
return false, false, errors.New("AddRawGenome called with invalid personIdentifier: " + personIdentifier)
|
|
}
|
|
|
|
updatingMyGenomesMutex.Lock()
|
|
defer updatingMyGenomesMutex.Unlock()
|
|
|
|
currentFileHash, err := blake3.GetBlake3HashAsHexString(32, []byte(rawGenomeString))
|
|
if (err != nil) { return false, false, err }
|
|
|
|
// We check to see if this file has already been imported for this Person
|
|
lookupMap := map[string]string{
|
|
"PersonIdentifier": personIdentifier,
|
|
"FileHash": currentFileHash,
|
|
}
|
|
|
|
anyItemFound, _, err := myGenomesMapListDatastore.GetMapListItems(lookupMap)
|
|
if (err != nil) { return false, false, err }
|
|
if (anyItemFound == true){
|
|
// Genome already exists
|
|
return true, true, nil
|
|
}
|
|
|
|
// Genome is new. We will add it to the map list and copy the file to Seekia local storage
|
|
|
|
rawGenomeReader := strings.NewReader(rawGenomeString)
|
|
|
|
companyName, importVersion, timeFileWasGenerated, snpCount, genomeIsPhased, rawGenomeMap, err := readRawGenomes.ReadRawGenomeFile(rawGenomeReader)
|
|
if (err != nil){
|
|
return false, false, nil
|
|
}
|
|
|
|
genomeHasUsefulLocations, _, err := prepareRawGenomes.ConvertRawGenomeToGenomeMap(rawGenomeMap, genomeIsPhased)
|
|
if (err != nil) { return false, false, err }
|
|
if (genomeHasUsefulLocations == false){
|
|
//TODO: Explain this to the user rather than just telling the user that the file is invalid
|
|
return false, false, nil
|
|
}
|
|
|
|
genomeIdentifier, err := helpers.GetNewRandomHexString(16)
|
|
if (err != nil) { return false, false, err }
|
|
|
|
importVersionString := helpers.ConvertIntToString(importVersion)
|
|
|
|
timeExported := helpers.ConvertInt64ToString(timeFileWasGenerated)
|
|
|
|
timeImported := time.Now().Unix()
|
|
timeImportedString := helpers.ConvertInt64ToString(timeImported)
|
|
|
|
isPhasedString := helpers.ConvertBoolToYesOrNoString(genomeIsPhased)
|
|
|
|
snpCountString := helpers.ConvertInt64ToString(snpCount)
|
|
|
|
newGenomeMap := map[string]string{
|
|
"PersonIdentifier": personIdentifier,
|
|
"GenomeIdentifier": genomeIdentifier,
|
|
"TimeExported": timeExported,
|
|
"TimeImported": timeImportedString,
|
|
"IsPhased": isPhasedString,
|
|
"SNPCount": snpCountString,
|
|
"CompanyName": companyName,
|
|
"ImportVersion": importVersionString,
|
|
"FileHash": currentFileHash,
|
|
}
|
|
|
|
err = myGenomesMapListDatastore.AddMapListItem(newGenomeMap)
|
|
if (err != nil) { return false, false, err }
|
|
|
|
userDirectory, err := localFilesystem.GetAppUserFolderPath()
|
|
if (err != nil) { return false, false, err }
|
|
|
|
myGenomesFolderPath := filepath.Join(userDirectory, "MyGenomes")
|
|
|
|
genomeFileName := genomeIdentifier + ".txt"
|
|
|
|
err = localFilesystem.CreateOrOverwriteFile([]byte(rawGenomeString), myGenomesFolderPath, genomeFileName)
|
|
if (err != nil) { return false, false, err }
|
|
|
|
return true, false, nil
|
|
}
|
|
|
|
func DeleteMyRawGenome(genomeIdentifier [16]byte)error{
|
|
|
|
updatingMyGenomesMutex.Lock()
|
|
defer updatingMyGenomesMutex.Unlock()
|
|
|
|
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
|
|
|
|
mapToDelete := map[string]string{
|
|
"GenomeIdentifier": genomeIdentifierHex,
|
|
}
|
|
|
|
err := myGenomesMapListDatastore.DeleteMapListItems(mapToDelete)
|
|
if (err != nil) { return err }
|
|
|
|
userDirectory, err := localFilesystem.GetAppUserFolderPath()
|
|
if (err != nil) { return err }
|
|
|
|
genomeFileName := genomeIdentifierHex + ".txt"
|
|
|
|
genomeFilePath := filepath.Join(userDirectory, "MyGenomes", genomeFileName)
|
|
|
|
_, err = localFilesystem.DeleteFileOrFolder(genomeFilePath)
|
|
if (err != nil) { return err }
|
|
|
|
return nil
|
|
}
|
|
|
|
//Outputs:
|
|
// -bool: Genome found
|
|
// -string: Person identifier
|
|
// -int64: Time Genome was exported from company
|
|
// -int64: Time genome was imported into Seekia
|
|
// -bool: Is Phased
|
|
// -int64: SNP Count
|
|
// -string: CompanyName
|
|
// -int: Import version
|
|
// -string: FileHash
|
|
// -error
|
|
func GetMyRawGenomeMetadata(genomeIdentifier [16]byte)(bool, string, int64, int64, bool, int64, string, int, string, error){
|
|
|
|
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
|
|
|
|
if (genomeIdentifierHex == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" || genomeIdentifierHex == "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"){
|
|
// These are the genome identifiers we use for example reports
|
|
// These are used to show the user what a genetic analysis would look like
|
|
|
|
getPersonIdentifier := func()string{
|
|
|
|
if (genomeIdentifierHex == "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"){
|
|
return "111111111111111111111111111111"
|
|
}
|
|
|
|
return "222222222222222222222222222222"
|
|
}
|
|
|
|
personIdentifier := getPersonIdentifier()
|
|
|
|
return true, personIdentifier, 0, 0, false, 676720, "AncestryDNA", 1, "", nil
|
|
}
|
|
|
|
lookupMap := map[string]string{
|
|
"GenomeIdentifier": genomeIdentifierHex,
|
|
}
|
|
|
|
anyItemFound, foundItemsMapList, err := myGenomesMapListDatastore.GetMapListItems(lookupMap)
|
|
if (anyItemFound == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", nil
|
|
}
|
|
if (len(foundItemsMapList) != 1){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Contains multiple entries for same GenomeIdentifier")
|
|
}
|
|
genomeMap := foundItemsMapList[0]
|
|
|
|
personIdentifier, exists := genomeMap["PersonIdentifier"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing PersonIdentifier")
|
|
}
|
|
|
|
timeExported, exists := genomeMap["TimeExported"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing TimeExported")
|
|
}
|
|
|
|
timeImported, exists := genomeMap["TimeImported"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing TimeImported")
|
|
}
|
|
|
|
isPhased, exists := genomeMap["IsPhased"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing IsPhased")
|
|
}
|
|
snpCount, exists := genomeMap["SNPCount"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing SNPCount")
|
|
}
|
|
companyName, exists := genomeMap["CompanyName"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing CompanyName")
|
|
}
|
|
importVersion, exists := genomeMap["ImportVersion"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing ImportVersion")
|
|
}
|
|
fileHash, exists := genomeMap["FileHash"]
|
|
if (exists == false){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item missing FileHash")
|
|
}
|
|
|
|
timeExportedInt64, err := helpers.ConvertStringToInt64(timeExported)
|
|
if (err != nil){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid TimeExported: " + timeExported)
|
|
}
|
|
timeImportedInt64, err := helpers.ConvertStringToInt64(timeImported)
|
|
if (err != nil){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid TimeImported: " + timeImported)
|
|
}
|
|
|
|
isPhasedBool, err := helpers.ConvertYesOrNoStringToBool(isPhased)
|
|
if (err != nil) {
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid isPhased: " + isPhased)
|
|
}
|
|
|
|
snpCountInt64, err := helpers.ConvertStringToInt64(snpCount)
|
|
if (err != nil){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid snpCount: " + snpCount)
|
|
}
|
|
|
|
importVersionInt, err := helpers.ConvertStringToInt(importVersion)
|
|
if (err != nil){
|
|
return false, "", 0, 0, false, 0, "", 0, "", errors.New("Malformed myGenomesMapList: Item contains invalid ImportVersion: " + importVersion)
|
|
}
|
|
|
|
return true, personIdentifier, timeExportedInt64, timeImportedInt64, isPhasedBool, snpCountInt64, companyName, importVersionInt, fileHash, nil
|
|
}
|
|
|
|
// This function is used to refresh the genome metadata when a new import version is available
|
|
func RefreshRawGenomeMetadata(genomeIdentifier [16]byte)error{
|
|
|
|
updatingMyGenomesMutex.Lock()
|
|
defer updatingMyGenomesMutex.Unlock()
|
|
|
|
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
|
|
|
|
myGenomesMapList, err := myGenomesMapListDatastore.GetMapList()
|
|
if (err != nil) { return err }
|
|
|
|
foundGenome := false
|
|
|
|
for _, genomeMap := range myGenomesMapList{
|
|
|
|
currentGenomeIdentifier, exists := genomeMap["GenomeIdentifier"]
|
|
if (exists == false){
|
|
return errors.New("myGenomesMapList item is malformed: item missing GenomeIdentifier.")
|
|
}
|
|
|
|
if (currentGenomeIdentifier != genomeIdentifierHex){
|
|
continue
|
|
}
|
|
if (foundGenome == true){
|
|
return errors.New("myGenomesMapList is malformed: Multiple entries for the same GenomeIdentifier exist.")
|
|
}
|
|
foundGenome = true
|
|
|
|
rawGenomeString, err := GetGenomeRawDataString(genomeIdentifier)
|
|
if (err != nil){ return err }
|
|
|
|
rawGenomeReader := strings.NewReader(rawGenomeString)
|
|
|
|
companyName, importVersion, timeFileWasGenerated, snpCount, genomeIsPhased, _, err := readRawGenomes.ReadRawGenomeFile(rawGenomeReader)
|
|
if (err != nil){
|
|
// Could be that file was importable via old import version, but new import version rejects it.
|
|
// That would still be bad undesireable behavior.
|
|
return errors.New("Unable to import raw genome during RefreshRawGenomeMetadata: " + err.Error())
|
|
}
|
|
|
|
importVersionString := helpers.ConvertIntToString(importVersion)
|
|
|
|
timeExported := helpers.ConvertInt64ToString(timeFileWasGenerated)
|
|
|
|
isPhasedString := helpers.ConvertBoolToYesOrNoString(genomeIsPhased)
|
|
|
|
snpCountString := helpers.ConvertInt64ToString(snpCount)
|
|
|
|
genomeMap["TimeExported"] = timeExported
|
|
genomeMap["IsPhased"] = isPhasedString
|
|
genomeMap["SNPCount"] = snpCountString
|
|
genomeMap["CompanyName"] = companyName
|
|
genomeMap["ImportVersion"] = importVersionString
|
|
}
|
|
|
|
if (foundGenome == false){
|
|
return errors.New("Genome not found during RefreshRawGenomeMetadata")
|
|
}
|
|
|
|
err = myGenomesMapListDatastore.OverwriteMapList(myGenomesMapList)
|
|
if (err != nil) { return err }
|
|
|
|
return nil
|
|
}
|
|
|
|
//Outputs:
|
|
// -string: Genome raw data string
|
|
// -error
|
|
func GetGenomeRawDataString(genomeIdentifier [16]byte)(string, error){
|
|
|
|
genomeIdentifierHex := encoding.EncodeBytesToHexString(genomeIdentifier[:])
|
|
|
|
userDirectory, err := localFilesystem.GetAppUserFolderPath()
|
|
if (err != nil) { return "", err }
|
|
|
|
genomeFileName := genomeIdentifierHex + ".txt"
|
|
|
|
genomeFilePath := filepath.Join(userDirectory, "MyGenomes", genomeFileName)
|
|
|
|
fileExists, fileBytes, err := localFilesystem.GetFileContents(genomeFilePath)
|
|
if (err != nil) { return "", err }
|
|
if (fileExists == false){
|
|
return "", errors.New("GetGenomeRawDataString called with genome whose file we cannot find.")
|
|
}
|
|
fileString := string(fileBytes)
|
|
|
|
return fileString, nil
|
|
}
|
|
|
|
// Returns all genomes for a person
|
|
func GetAllPersonGenomesMapList(personIdentifier string)([]map[string]string, error){
|
|
|
|
lookupMap := map[string]string{
|
|
"PersonIdentifier": personIdentifier,
|
|
}
|
|
|
|
anyItemsFound, matchingItemsMapList, err := myGenomesMapListDatastore.GetMapListItems(lookupMap)
|
|
if (err != nil) { return nil, err }
|
|
if (anyItemsFound == false){
|
|
emptyMapList := make([]map[string]string, 0)
|
|
return emptyMapList, nil
|
|
}
|
|
|
|
return matchingItemsMapList, nil
|
|
}
|
|
|
|
// This will not include any calculated genome identifiers, which only exist within analyses
|
|
func GetAllPersonRawGenomeIdentifiersList(personIdentifier string)([][16]byte, error){
|
|
|
|
allPersonGenomesMapList, err := GetAllPersonGenomesMapList(personIdentifier)
|
|
if (err != nil) { return nil, err }
|
|
|
|
personGenomeIdentifiersList := make([][16]byte, 0, len(allPersonGenomesMapList))
|
|
|
|
for _, genomeMap := range allPersonGenomesMapList{
|
|
|
|
genomeIdentifierHex, exists := genomeMap["GenomeIdentifier"]
|
|
if (exists == false){
|
|
return nil, errors.New("Malformed myGenomesMapList: Item missing GenomeIdentifier")
|
|
}
|
|
|
|
genomeIdentifier, err := encoding.DecodeHexStringTo16ByteArray(genomeIdentifierHex)
|
|
if (err != nil){
|
|
return nil, errors.New("Malformed myGenomesMapList: Item contains invalid GenomeIdentifier: " + genomeIdentifierHex)
|
|
}
|
|
|
|
personGenomeIdentifiersList = append(personGenomeIdentifiersList, genomeIdentifier)
|
|
}
|
|
|
|
return personGenomeIdentifiersList, nil
|
|
}
|
|
|
|
|
|
func DeleteAllPersonGenomes(personIdentifier string)error{
|
|
|
|
updatingMyGenomesMutex.Lock()
|
|
defer updatingMyGenomesMutex.Unlock()
|
|
|
|
mapToDelete := map[string]string{
|
|
"PersonIdentifier": personIdentifier,
|
|
}
|
|
|
|
err := myGenomesMapListDatastore.DeleteMapListItems(mapToDelete)
|
|
if (err != nil) { return err }
|
|
|
|
return nil
|
|
}
|
|
|
|
|