seekia/resources/geneticReferences/locusMetadata/locusMetadata.go

408 lines
11 KiB
Go

// locusMetadata provides information about gene locations.
package locusMetadata
// Locus position information should correspond to Human genome reference build 38.
import "seekia/internal/helpers"
import _ "embed"
import "encoding/json"
import "errors"
//go:embed LocusMetadata_Chromosome1.json
var LocusMetadataFile_Chromosome1 []byte
//go:embed LocusMetadata_Chromosome2.json
var LocusMetadataFile_Chromosome2 []byte
//go:embed LocusMetadata_Chromosome3.json
var LocusMetadataFile_Chromosome3 []byte
//go:embed LocusMetadata_Chromosome4.json
var LocusMetadataFile_Chromosome4 []byte
//go:embed LocusMetadata_Chromosome5.json
var LocusMetadataFile_Chromosome5 []byte
//go:embed LocusMetadata_Chromosome6.json
var LocusMetadataFile_Chromosome6 []byte
//go:embed LocusMetadata_Chromosome7.json
var LocusMetadataFile_Chromosome7 []byte
//go:embed LocusMetadata_Chromosome8.json
var LocusMetadataFile_Chromosome8 []byte
//go:embed LocusMetadata_Chromosome9.json
var LocusMetadataFile_Chromosome9 []byte
//go:embed LocusMetadata_Chromosome10.json
var LocusMetadataFile_Chromosome10 []byte
//go:embed LocusMetadata_Chromosome11.json
var LocusMetadataFile_Chromosome11 []byte
//go:embed LocusMetadata_Chromosome12.json
var LocusMetadataFile_Chromosome12 []byte
//go:embed LocusMetadata_Chromosome13.json
var LocusMetadataFile_Chromosome13 []byte
//go:embed LocusMetadata_Chromosome14.json
var LocusMetadataFile_Chromosome14 []byte
//go:embed LocusMetadata_Chromosome15.json
var LocusMetadataFile_Chromosome15 []byte
//go:embed LocusMetadata_Chromosome16.json
var LocusMetadataFile_Chromosome16 []byte
//go:embed LocusMetadata_Chromosome17.json
var LocusMetadataFile_Chromosome17 []byte
//go:embed LocusMetadata_Chromosome19.json
var LocusMetadataFile_Chromosome19 []byte
//go:embed LocusMetadata_Chromosome20.json
var LocusMetadataFile_Chromosome20 []byte
//go:embed LocusMetadata_Chromosome21.json
var LocusMetadataFile_Chromosome21 []byte
//go:embed LocusMetadata_Chromosome22.json
var LocusMetadataFile_Chromosome22 []byte
type LocusMetadata struct{
// A list of RSIDs that refer to this location
// Each RSID is equivalent and refers to the same location
// rsID stands for Reference SNP cluster ID.
// Each rsID is an "rs" followed by a number.
// We store the number after the rs as an int64.
RSIDsList []int64
// The chromosome which this location exists on
Chromosome int
// The position of this locus
// This is a number describing its location on the chromosome it exists on.
Position int
// A list of gene names which refer to the gene which this locus belongs to.
// Each gene name refers to the same gene.
// Will be a list containing "MISSING" if the gene name has not been added yet
// Will be an empty list if no gene exists
GeneNamesList []string
// A list of alternate names for the rsid used by companies
// These are the names that the raw genome files exported from companies sometimes use instead of rsIDs
// Example: TwentyThreeAndMe -> []string{"i5010839", "i5006049", "i4000295", "i5010838", "i5010837"}
CompanyAliases map[GeneticsCompany][]string
// Reference name -> Reference link
References map[string]string
}
// We use this data structure to save space, rather than using String
type GeneticsCompany byte
const TwentyThreeAndMe GeneticsCompany = 1
const FamilyTreeDNA GeneticsCompany = 2
const MyHeritage GeneticsCompany = 3
// Map Structure: RSID -> LocusMetadata object
var lociMetadataMap map[int64]LocusMetadata
// This map stores a list of aliases for rsids which have aliases
// An alias is a different rsid which represents the same locus
var rsidAliasesMap map[int64][]int64
// We use these maps to store the locus aliases for rsIDs used by companies
// Map structure: Alias -> Primary rsID (there may be aliases)
// Example: "i5010839" -> 78655421
var companyAliasesMap_23andMe map[string]int64
var companyAliasesMap_FamilyTreeDNA map[string]int64
var companyAliasesMap_MyHeritage map[string]int64
func InitializeLocusMetadataVariables()error{
lociMetadataMap = make(map[int64]LocusMetadata)
rsidAliasesMap = make(map[int64][]int64)
companyAliasesMap_23andMe = make(map[string]int64)
companyAliasesMap_FamilyTreeDNA = make(map[string]int64)
companyAliasesMap_MyHeritage = make(map[string]int64)
locusObjectsList, err := GetLocusMetadataObjectsList()
if (err != nil) { return err }
for _, locusObject := range locusObjectsList{
rsidsList := locusObject.RSIDsList
for _, rsid := range rsidsList{
_, exists := lociMetadataMap[rsid]
if (exists == true){
return errors.New("lociMetadataMap contains duplicate rsid.")
}
lociMetadataMap[rsid] = locusObject
}
if (len(rsidsList) > 1){
// We add rsid aliases to map
for _, rsid := range rsidsList{
rsidAliasesList := make([]int64, 0)
for _, rsidInner := range rsidsList{
if (rsid != rsidInner){
rsidAliasesList = append(rsidAliasesList, rsidInner)
}
}
rsidAliasesMap[rsid] = rsidAliasesList
}
}
companyAliasesMap := locusObject.CompanyAliases
if (len(companyAliasesMap) > 0){
// Now we add company aliases to maps
primaryRSID := rsidsList[0]
for companyObject, companyAliasesList := range companyAliasesMap{
if (companyObject == TwentyThreeAndMe){
for _, locusAlias := range companyAliasesList{
companyAliasesMap_23andMe[locusAlias] = primaryRSID
}
} else if (companyObject == FamilyTreeDNA){
for _, locusAlias := range companyAliasesList{
companyAliasesMap_FamilyTreeDNA[locusAlias] = primaryRSID
}
} else if (companyObject == MyHeritage){
for _, locusAlias := range companyAliasesList{
companyAliasesMap_MyHeritage[locusAlias] = primaryRSID
}
} else {
companyByteString := helpers.ConvertIntToString(int(companyObject))
return errors.New("Locus Object company aliases map contains invalid company object: " + companyByteString)
}
}
}
}
return nil
}
//Outputs:
// -bool: Locus metadata exists
// -LocusMetadata
// -error
func GetLocusMetadata(inputRSID int64)(bool, LocusMetadata, error){
if (lociMetadataMap == nil){
return false, LocusMetadata{}, errors.New("GetLocusMetadata called when lociMetadataMap is not initialized.")
}
locusMetadataObject, exists := lociMetadataMap[inputRSID]
if (exists == false){
return false, LocusMetadata{}, nil
}
return true, locusMetadataObject, nil
}
// This function will return a list of RSIDs which refer to the same location as the input RSID
// -bool: Any Aliases exist
// -[]int64: List of alias RSIDs
// -error (if RSID is unknown)
func GetRSIDAliases(inputRSID int64)(bool, []int64, error){
if (rsidAliasesMap == nil){
return false, nil, errors.New("rsidAliasesMap called when rsidAliasesMap is not initialized.")
}
aliasesList, exists := rsidAliasesMap[inputRSID]
if (exists == false){
return false, nil, nil
}
return true, aliasesList, nil
}
//Outputs:
// -bool: Alias found
// -int64: Primary rsID alias to use to represent this locus
// -error
func GetCompanyAliasRSID(companyName string, locusAlias string)(bool, int64, error){
if (companyName == "23andMe"){
locusRSID, exists := companyAliasesMap_23andMe[locusAlias]
if (exists == false){
return false, 0, nil
}
return true, locusRSID, nil
} else if (companyName == "FamilyTreeDNA"){
locusRSID, exists := companyAliasesMap_FamilyTreeDNA[locusAlias]
if (exists == false){
return false, 0, nil
}
return true, locusRSID, nil
} else if (companyName == "MyHeritage"){
locusRSID, exists := companyAliasesMap_MyHeritage[locusAlias]
if (exists == false){
return false, 0, nil
}
return true, locusRSID, nil
}
return false, 0, errors.New("GetCompanyAliasRSID called with invalid companyName: " + companyName)
}
// This function is only public for use in testing
func GetLocusMetadataObjectsList()([]LocusMetadata, error){
chromosomesList := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}
locusMetadataObjectsList := make([]LocusMetadata, 0, len(chromosomesList))
for _, chromosomesInt := range chromosomesList{
chromosomeLocusMetadataObjectsList, err := GetLocusMetadataObjectsListByChromosome(chromosomesInt)
if (err != nil){ return nil, err }
locusMetadataObjectsList = append(locusMetadataObjectsList, chromosomeLocusMetadataObjectsList...)
}
return locusMetadataObjectsList, nil
}
func GetLocusMetadataObjectsListByChromosome(chromosome int)([]LocusMetadata, error){
if (chromosome < 1 || chromosome > 22){
chromosomeString := helpers.ConvertIntToString(chromosome)
return nil, errors.New("GetLocusMetadataObjectsListByChromosome called with invalid chromosome: " + chromosomeString)
}
// Outputs:
// -bool: File exists
// -[]byte: File bytes
getFileBytes := func()(bool, []byte){
if (chromosome == 1){
return true, LocusMetadataFile_Chromosome1
}
if (chromosome == 2){
return true, LocusMetadataFile_Chromosome2
}
if (chromosome == 3){
return true, LocusMetadataFile_Chromosome3
}
if (chromosome == 4){
return true, LocusMetadataFile_Chromosome4
}
if (chromosome == 5){
return true, LocusMetadataFile_Chromosome5
}
if (chromosome == 6){
return true, LocusMetadataFile_Chromosome6
}
if (chromosome == 7){
return true, LocusMetadataFile_Chromosome7
}
if (chromosome == 8){
return true, LocusMetadataFile_Chromosome8
}
if (chromosome == 9){
return true, LocusMetadataFile_Chromosome9
}
if (chromosome == 10){
return true, LocusMetadataFile_Chromosome10
}
if (chromosome == 11){
return true, LocusMetadataFile_Chromosome11
}
if (chromosome == 12){
return true, LocusMetadataFile_Chromosome12
}
if (chromosome == 13){
return true, LocusMetadataFile_Chromosome13
}
if (chromosome == 14){
return true, LocusMetadataFile_Chromosome14
}
if (chromosome == 15){
return true, LocusMetadataFile_Chromosome15
}
if (chromosome == 16){
return true, LocusMetadataFile_Chromosome16
}
if (chromosome == 17){
return true, LocusMetadataFile_Chromosome17
}
//if (chromosome == 18){
// return true, LocusMetadataFile_Chromosome18
//}
if (chromosome == 19){
return true, LocusMetadataFile_Chromosome19
}
if (chromosome == 20){
return true, LocusMetadataFile_Chromosome20
}
if (chromosome == 21){
return true, LocusMetadataFile_Chromosome21
}
if (chromosome == 22){
return true, LocusMetadataFile_Chromosome22
}
return false, nil
}
fileExists, fileBytes := getFileBytes()
if (fileExists == false){
// No loci exist for this chromosome
emptyList := make([]LocusMetadata, 0)
return emptyList, nil
}
var locusMetadataObjectsList []LocusMetadata
err := json.Unmarshal(fileBytes, &locusMetadataObjectsList)
if (err != nil) { return nil, err }
return locusMetadataObjectsList, nil
}