2024-08-05 09:11:10 +02:00
|
|
|
|
|
|
|
// modifyLocusMetadata provides functions to modify the locus metadata file
|
|
|
|
// This is a gob encoded file which contains information about genome loci
|
|
|
|
// Examples of this information are chromosome, position, and which gene the locus belongs to.
|
|
|
|
// We have to use golang to edit this file, we can't edit it manually.
|
|
|
|
// To run these functions, see:
|
|
|
|
// /utilities/addLocusMetadata/addLocusMetadata.go
|
|
|
|
// /utilities/importLocusMetadata/importLocusMetadata.go
|
|
|
|
|
|
|
|
package modifyLocusMetadata
|
|
|
|
|
|
|
|
import "seekia/resources/geneticReferences/locusMetadata"
|
|
|
|
|
|
|
|
import "seekia/internal/helpers"
|
|
|
|
|
|
|
|
import "encoding/gob"
|
|
|
|
|
|
|
|
import "bytes"
|
|
|
|
import "reflect"
|
|
|
|
import "errors"
|
|
|
|
|
|
|
|
|
|
|
|
//Outputs:
|
|
|
|
// -int: Quantity of added loci (this also includes loci which already existed but had new info to merge)
|
|
|
|
// -[]byte: File bytes of the new locus metadata file
|
|
|
|
// -error
|
|
|
|
func AddLocusMetadata(inputLociToAddList []locusMetadata.LocusMetadata)(int, []byte, error){
|
|
|
|
|
|
|
|
err := locusMetadata.InitializeLocusMetadataVariables()
|
|
|
|
if (err != nil){ return 0, nil, err }
|
|
|
|
|
|
|
|
lociToAddList := make([]locusMetadata.LocusMetadata, 0)
|
|
|
|
lociToDeleteList := make([]locusMetadata.LocusMetadata, 0)
|
|
|
|
|
|
|
|
// We use this map to make sure that each LocusMetadata object to add has unique rsIDs
|
|
|
|
newLocusMetadataRSIDsMap := make(map[int64]struct{})
|
|
|
|
|
|
|
|
for _, newLocusMetadataObject := range inputLociToAddList{
|
|
|
|
|
|
|
|
newLocusRSIDsList := newLocusMetadataObject.RSIDsList
|
|
|
|
|
|
|
|
for _, rsID := range newLocusRSIDsList{
|
|
|
|
|
|
|
|
_, exists := newLocusMetadataRSIDsMap[rsID]
|
|
|
|
if (exists == true){
|
2024-08-13 15:25:47 +02:00
|
|
|
rsIDString := helpers.ConvertInt64ToString(rsID)
|
|
|
|
return 0, nil, errors.New("inputLociToAddList contains multiple locus metadatas with a duplicate rsID: " + rsIDString)
|
2024-08-05 09:11:10 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
newLocusMetadataRSIDsMap[rsID] = struct{}{}
|
|
|
|
}
|
|
|
|
|
|
|
|
// First we check to see if locus metadata already exists
|
|
|
|
|
|
|
|
// Outputs:
|
|
|
|
// -bool: Locus metadata already exists for this locus
|
|
|
|
// -locusMetadata.LocusMetadata
|
|
|
|
// -error
|
|
|
|
getExistingLocusMetadata := func()(bool, locusMetadata.LocusMetadata, error){
|
|
|
|
|
|
|
|
for _, rsID := range newLocusRSIDsList{
|
|
|
|
|
|
|
|
exists, existingLocusMetadata, err := locusMetadata.GetLocusMetadata(rsID)
|
|
|
|
if (err != nil){ return false, locusMetadata.LocusMetadata{}, err }
|
|
|
|
if (exists == true){
|
|
|
|
return true, existingLocusMetadata, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false, locusMetadata.LocusMetadata{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
locusMetadataExists, existingLocusMetadata, err := getExistingLocusMetadata()
|
|
|
|
if (err != nil) { return 0, nil, err }
|
|
|
|
if (locusMetadataExists == false){
|
|
|
|
|
|
|
|
lociToAddList = append(lociToAddList, newLocusMetadataObject)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// We check to see if the existing locus metadata contains identical chromosome/position
|
|
|
|
|
|
|
|
newChromosome := newLocusMetadataObject.Chromosome
|
|
|
|
existingChromosome := existingLocusMetadata.Chromosome
|
|
|
|
|
|
|
|
if (existingChromosome != newChromosome){
|
|
|
|
// New locus metadata conflicts with existing locus metadata
|
|
|
|
return 0, nil, errors.New("Trying to add a locus metadata with a conflicting chromosome.")
|
|
|
|
}
|
|
|
|
|
|
|
|
newPosition := newLocusMetadataObject.Position
|
|
|
|
existingPosition := existingLocusMetadata.Position
|
|
|
|
|
|
|
|
if (existingPosition != newPosition){
|
|
|
|
// New locus metadata conflicts with existing locus metadata
|
|
|
|
return 0, nil, errors.New("Trying to add a locus metadata with a conflicting position.")
|
|
|
|
}
|
|
|
|
|
|
|
|
newRSIDsList := newLocusMetadataObject.RSIDsList
|
|
|
|
existingRSIDsList := existingLocusMetadata.RSIDsList
|
|
|
|
|
|
|
|
combinedRSIDsList := helpers.CombineTwoListsAndAvoidDuplicates(newRSIDsList, existingRSIDsList)
|
|
|
|
|
|
|
|
newLocusMetadataObject.RSIDsList = combinedRSIDsList
|
|
|
|
|
|
|
|
newGeneInfoIsKnown := newLocusMetadataObject.GeneInfoIsKnown
|
|
|
|
existingGeneInfoIsKnown := existingLocusMetadata.GeneInfoIsKnown
|
|
|
|
|
|
|
|
if (newGeneInfoIsKnown == false && existingGeneInfoIsKnown == true){
|
|
|
|
|
|
|
|
// We add existing gene info to new locus metadata object
|
|
|
|
|
|
|
|
existingGeneExists := existingLocusMetadata.GeneExists
|
|
|
|
existingGeneNamesList := existingLocusMetadata.GeneNamesList
|
|
|
|
|
|
|
|
newLocusMetadataObject.GeneInfoIsKnown = true
|
|
|
|
newLocusMetadataObject.GeneExists = existingGeneExists
|
|
|
|
newLocusMetadataObject.GeneNamesList = existingGeneNamesList
|
|
|
|
|
|
|
|
} else if (newGeneInfoIsKnown == true && existingGeneInfoIsKnown == true){
|
|
|
|
|
|
|
|
// We check for conflicts
|
|
|
|
|
|
|
|
existingGeneExists := existingLocusMetadata.GeneExists
|
|
|
|
newGeneExists := newLocusMetadataObject.GeneExists
|
|
|
|
|
|
|
|
if (existingGeneExists != newGeneExists){
|
|
|
|
// New locus metadata conflicts with existing locus metadata
|
|
|
|
return 0, nil, errors.New("Trying to add a locus metadata with a GeneExists.")
|
|
|
|
}
|
|
|
|
|
|
|
|
if (existingGeneExists == true){
|
|
|
|
|
|
|
|
existingGeneNamesList := existingLocusMetadata.GeneNamesList
|
|
|
|
newGeneNamesList := newLocusMetadataObject.GeneNamesList
|
|
|
|
|
|
|
|
if (existingGeneNamesList == nil){
|
|
|
|
return 0, nil, errors.New("Locus Metadata contains item with known gene name(s) but with a nil GeneNamesList")
|
|
|
|
}
|
|
|
|
if (newGeneNamesList == nil){
|
|
|
|
return 0, nil, errors.New("New locus metadata item to add contains known gene name(s) but with a nil GeneNamesList")
|
|
|
|
}
|
|
|
|
|
|
|
|
combinedGeneNamesList := helpers.CombineTwoListsAndAvoidDuplicates(existingGeneNamesList, newGeneNamesList)
|
|
|
|
|
|
|
|
newLocusMetadataObject.GeneNamesList = combinedGeneNamesList
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
existingReferencesMap := existingLocusMetadata.References
|
|
|
|
newReferencesMap := newLocusMetadataObject.References
|
|
|
|
|
|
|
|
// We merge the references maps
|
|
|
|
|
|
|
|
for key, existingValue := range existingReferencesMap{
|
|
|
|
|
|
|
|
newValue, exists := newReferencesMap[key]
|
|
|
|
if (exists == false){
|
|
|
|
newReferencesMap[key] = existingValue
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if (existingValue != newValue){
|
|
|
|
return 0, nil, errors.New("Existing locus metadata references map contains different value for same key")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
newLocusMetadataObject.References = newReferencesMap
|
|
|
|
|
|
|
|
newCompanyAliasesMap := newLocusMetadataObject.CompanyAliases
|
|
|
|
existingCompanyAliasesMap := existingLocusMetadata.CompanyAliases
|
|
|
|
|
|
|
|
for key, existingValue := range existingCompanyAliasesMap{
|
|
|
|
|
|
|
|
newValue, exists := newCompanyAliasesMap[key]
|
|
|
|
if (exists == false){
|
|
|
|
newCompanyAliasesMap[key] = existingValue
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// We combine the company alias lists
|
|
|
|
|
|
|
|
combinedCompanyAliasesList := helpers.CombineTwoListsAndAvoidDuplicates(existingValue, newValue)
|
|
|
|
|
|
|
|
newCompanyAliasesMap[key] = combinedCompanyAliasesList
|
|
|
|
}
|
|
|
|
|
|
|
|
newLocusMetadataObject.CompanyAliases = newCompanyAliasesMap
|
|
|
|
|
|
|
|
lociToAddList = append(lociToAddList, newLocusMetadataObject)
|
|
|
|
lociToDeleteList = append(lociToDeleteList, existingLocusMetadata)
|
|
|
|
}
|
|
|
|
|
|
|
|
existingLocusMetadataObjectsList, err := locusMetadata.GetLocusMetadataObjectsList()
|
|
|
|
if (err != nil) { return 0, nil, err }
|
|
|
|
|
|
|
|
newLocusMetadataObjectsList := make([]locusMetadata.LocusMetadata, 0)
|
|
|
|
|
|
|
|
for _, locusMetadataObject := range existingLocusMetadataObjectsList{
|
|
|
|
|
|
|
|
// We check to see if we should delete this item
|
|
|
|
|
|
|
|
checkIfLocusIsDeleted := func()bool{
|
|
|
|
|
|
|
|
for _, locusToDelete := range lociToDeleteList{
|
|
|
|
|
|
|
|
areEqual := reflect.DeepEqual(locusToDelete, locusMetadataObject)
|
|
|
|
if (areEqual == true){
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
locusIsDeleted := checkIfLocusIsDeleted()
|
|
|
|
if (locusIsDeleted == false){
|
|
|
|
newLocusMetadataObjectsList = append(newLocusMetadataObjectsList, locusMetadataObject)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
newLocusMetadataObjectsList = append(newLocusMetadataObjectsList, lociToAddList...)
|
|
|
|
|
|
|
|
quantityOfAddedLoci := len(lociToAddList)
|
|
|
|
|
|
|
|
buffer := new(bytes.Buffer)
|
|
|
|
|
|
|
|
encoder := gob.NewEncoder(buffer)
|
|
|
|
|
|
|
|
err = encoder.Encode(newLocusMetadataObjectsList)
|
|
|
|
if (err != nil) { return 0, nil, err }
|
|
|
|
|
|
|
|
newLocusMetadataFileBytes := buffer.Bytes()
|
|
|
|
|
|
|
|
return quantityOfAddedLoci, newLocusMetadataFileBytes, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
func PruneLocusMetadata()([]byte, error){
|
|
|
|
|
|
|
|
//TODO: Create package
|
|
|
|
// This function will build a list of every rsID used in every trait and disease, and
|
|
|
|
// prune the locus metadata files of loci which do not exist in that list
|
|
|
|
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
|