seekia/resources/geneticReferences/modifyLocusMetadata/modifyLocusMetadata.go

249 lines
7.7 KiB
Go
Raw Permalink Normal View History

// modifyLocusMetadata provides functions to modify the locus metadata file
// This is a gob encoded file which contains information about genome loci
// Examples of this information are chromosome, position, and which gene the locus belongs to.
// We have to use golang to edit this file, we can't edit it manually.
// To run these functions, see:
// /utilities/addLocusMetadata/addLocusMetadata.go
// /utilities/importLocusMetadata/importLocusMetadata.go
package modifyLocusMetadata
import "seekia/resources/geneticReferences/locusMetadata"
import "seekia/internal/helpers"
import "encoding/gob"
import "bytes"
import "reflect"
import "errors"
//Outputs:
// -int: Quantity of added loci (this also includes loci which already existed but had new info to merge)
// -[]byte: File bytes of the new locus metadata file
// -error
func AddLocusMetadata(inputLociToAddList []locusMetadata.LocusMetadata)(int, []byte, error){
err := locusMetadata.InitializeLocusMetadataVariables()
if (err != nil){ return 0, nil, err }
lociToAddList := make([]locusMetadata.LocusMetadata, 0)
lociToDeleteList := make([]locusMetadata.LocusMetadata, 0)
// We use this map to make sure that each LocusMetadata object to add has unique rsIDs
newLocusMetadataRSIDsMap := make(map[int64]struct{})
for _, newLocusMetadataObject := range inputLociToAddList{
newLocusRSIDsList := newLocusMetadataObject.RSIDsList
for _, rsID := range newLocusRSIDsList{
_, exists := newLocusMetadataRSIDsMap[rsID]
if (exists == true){
rsIDString := helpers.ConvertInt64ToString(rsID)
return 0, nil, errors.New("inputLociToAddList contains multiple locus metadatas with a duplicate rsID: " + rsIDString)
}
newLocusMetadataRSIDsMap[rsID] = struct{}{}
}
// First we check to see if locus metadata already exists
// Outputs:
// -bool: Locus metadata already exists for this locus
// -locusMetadata.LocusMetadata
// -error
getExistingLocusMetadata := func()(bool, locusMetadata.LocusMetadata, error){
for _, rsID := range newLocusRSIDsList{
exists, existingLocusMetadata, err := locusMetadata.GetLocusMetadata(rsID)
if (err != nil){ return false, locusMetadata.LocusMetadata{}, err }
if (exists == true){
return true, existingLocusMetadata, nil
}
}
return false, locusMetadata.LocusMetadata{}, nil
}
locusMetadataExists, existingLocusMetadata, err := getExistingLocusMetadata()
if (err != nil) { return 0, nil, err }
if (locusMetadataExists == false){
lociToAddList = append(lociToAddList, newLocusMetadataObject)
continue
}
// We check to see if the existing locus metadata contains identical chromosome/position
newChromosome := newLocusMetadataObject.Chromosome
existingChromosome := existingLocusMetadata.Chromosome
if (existingChromosome != newChromosome){
// New locus metadata conflicts with existing locus metadata
return 0, nil, errors.New("Trying to add a locus metadata with a conflicting chromosome.")
}
newPosition := newLocusMetadataObject.Position
existingPosition := existingLocusMetadata.Position
if (existingPosition != newPosition){
// New locus metadata conflicts with existing locus metadata
return 0, nil, errors.New("Trying to add a locus metadata with a conflicting position.")
}
newRSIDsList := newLocusMetadataObject.RSIDsList
existingRSIDsList := existingLocusMetadata.RSIDsList
combinedRSIDsList := helpers.CombineTwoListsAndAvoidDuplicates(newRSIDsList, existingRSIDsList)
newLocusMetadataObject.RSIDsList = combinedRSIDsList
newGeneInfoIsKnown := newLocusMetadataObject.GeneInfoIsKnown
existingGeneInfoIsKnown := existingLocusMetadata.GeneInfoIsKnown
if (newGeneInfoIsKnown == false && existingGeneInfoIsKnown == true){
// We add existing gene info to new locus metadata object
existingGeneExists := existingLocusMetadata.GeneExists
existingGeneNamesList := existingLocusMetadata.GeneNamesList
newLocusMetadataObject.GeneInfoIsKnown = true
newLocusMetadataObject.GeneExists = existingGeneExists
newLocusMetadataObject.GeneNamesList = existingGeneNamesList
} else if (newGeneInfoIsKnown == true && existingGeneInfoIsKnown == true){
// We check for conflicts
existingGeneExists := existingLocusMetadata.GeneExists
newGeneExists := newLocusMetadataObject.GeneExists
if (existingGeneExists != newGeneExists){
// New locus metadata conflicts with existing locus metadata
return 0, nil, errors.New("Trying to add a locus metadata with a GeneExists.")
}
if (existingGeneExists == true){
existingGeneNamesList := existingLocusMetadata.GeneNamesList
newGeneNamesList := newLocusMetadataObject.GeneNamesList
if (existingGeneNamesList == nil){
return 0, nil, errors.New("Locus Metadata contains item with known gene name(s) but with a nil GeneNamesList")
}
if (newGeneNamesList == nil){
return 0, nil, errors.New("New locus metadata item to add contains known gene name(s) but with a nil GeneNamesList")
}
combinedGeneNamesList := helpers.CombineTwoListsAndAvoidDuplicates(existingGeneNamesList, newGeneNamesList)
newLocusMetadataObject.GeneNamesList = combinedGeneNamesList
}
}
existingReferencesMap := existingLocusMetadata.References
newReferencesMap := newLocusMetadataObject.References
// We merge the references maps
for key, existingValue := range existingReferencesMap{
newValue, exists := newReferencesMap[key]
if (exists == false){
newReferencesMap[key] = existingValue
continue
}
if (existingValue != newValue){
return 0, nil, errors.New("Existing locus metadata references map contains different value for same key")
}
}
newLocusMetadataObject.References = newReferencesMap
newCompanyAliasesMap := newLocusMetadataObject.CompanyAliases
existingCompanyAliasesMap := existingLocusMetadata.CompanyAliases
for key, existingValue := range existingCompanyAliasesMap{
newValue, exists := newCompanyAliasesMap[key]
if (exists == false){
newCompanyAliasesMap[key] = existingValue
continue
}
// We combine the company alias lists
combinedCompanyAliasesList := helpers.CombineTwoListsAndAvoidDuplicates(existingValue, newValue)
newCompanyAliasesMap[key] = combinedCompanyAliasesList
}
newLocusMetadataObject.CompanyAliases = newCompanyAliasesMap
lociToAddList = append(lociToAddList, newLocusMetadataObject)
lociToDeleteList = append(lociToDeleteList, existingLocusMetadata)
}
existingLocusMetadataObjectsList, err := locusMetadata.GetLocusMetadataObjectsList()
if (err != nil) { return 0, nil, err }
newLocusMetadataObjectsList := make([]locusMetadata.LocusMetadata, 0)
for _, locusMetadataObject := range existingLocusMetadataObjectsList{
// We check to see if we should delete this item
checkIfLocusIsDeleted := func()bool{
for _, locusToDelete := range lociToDeleteList{
areEqual := reflect.DeepEqual(locusToDelete, locusMetadataObject)
if (areEqual == true){
return true
}
}
return false
}
locusIsDeleted := checkIfLocusIsDeleted()
if (locusIsDeleted == false){
newLocusMetadataObjectsList = append(newLocusMetadataObjectsList, locusMetadataObject)
}
}
newLocusMetadataObjectsList = append(newLocusMetadataObjectsList, lociToAddList...)
quantityOfAddedLoci := len(lociToAddList)
buffer := new(bytes.Buffer)
encoder := gob.NewEncoder(buffer)
err = encoder.Encode(newLocusMetadataObjectsList)
if (err != nil) { return 0, nil, err }
newLocusMetadataFileBytes := buffer.Bytes()
return quantityOfAddedLoci, newLocusMetadataFileBytes, nil
}
func PruneLocusMetadata()([]byte, error){
//TODO: Create package
// This function will build a list of every rsID used in every trait and disease, and
// prune the locus metadata files of loci which do not exist in that list
return nil, nil
}