408 lines
11 KiB
Go
408 lines
11 KiB
Go
|
|
// locusMetadata provides information about gene locations.
|
|
|
|
package locusMetadata
|
|
|
|
// Locus position information should correspond to Human genome reference build 38.
|
|
|
|
import "seekia/internal/helpers"
|
|
|
|
import _ "embed"
|
|
|
|
import "encoding/json"
|
|
import "errors"
|
|
|
|
|
|
//go:embed LocusMetadata_Chromosome1.json
|
|
var LocusMetadataFile_Chromosome1 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome2.json
|
|
var LocusMetadataFile_Chromosome2 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome3.json
|
|
var LocusMetadataFile_Chromosome3 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome4.json
|
|
var LocusMetadataFile_Chromosome4 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome5.json
|
|
var LocusMetadataFile_Chromosome5 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome6.json
|
|
var LocusMetadataFile_Chromosome6 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome7.json
|
|
var LocusMetadataFile_Chromosome7 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome8.json
|
|
var LocusMetadataFile_Chromosome8 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome9.json
|
|
var LocusMetadataFile_Chromosome9 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome10.json
|
|
var LocusMetadataFile_Chromosome10 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome11.json
|
|
var LocusMetadataFile_Chromosome11 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome12.json
|
|
var LocusMetadataFile_Chromosome12 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome13.json
|
|
var LocusMetadataFile_Chromosome13 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome14.json
|
|
var LocusMetadataFile_Chromosome14 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome15.json
|
|
var LocusMetadataFile_Chromosome15 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome16.json
|
|
var LocusMetadataFile_Chromosome16 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome17.json
|
|
var LocusMetadataFile_Chromosome17 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome19.json
|
|
var LocusMetadataFile_Chromosome19 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome20.json
|
|
var LocusMetadataFile_Chromosome20 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome21.json
|
|
var LocusMetadataFile_Chromosome21 []byte
|
|
|
|
//go:embed LocusMetadata_Chromosome22.json
|
|
var LocusMetadataFile_Chromosome22 []byte
|
|
|
|
|
|
type LocusMetadata struct{
|
|
|
|
// A list of RSIDs that refer to this location
|
|
// Each RSID is equivalent and refers to the same location
|
|
// rsID stands for Reference SNP cluster ID.
|
|
// Each rsID is an "rs" followed by a number.
|
|
// We store the number after the rs as an int64.
|
|
RSIDsList []int64
|
|
|
|
// The chromosome which this location exists on
|
|
Chromosome int
|
|
|
|
// The position of this locus
|
|
// This is a number describing its location on the chromosome it exists on.
|
|
Position int
|
|
|
|
// A list of gene names which refer to the gene which this locus belongs to.
|
|
// Each gene name refers to the same gene.
|
|
// Will be a list containing "MISSING" if the gene name has not been added yet
|
|
// Will be an empty list if no gene exists
|
|
GeneNamesList []string
|
|
|
|
// A list of alternate names for the rsid used by companies
|
|
// These are the names that the raw genome files exported from companies sometimes use instead of rsIDs
|
|
// Example: TwentyThreeAndMe -> []string{"i5010839", "i5006049", "i4000295", "i5010838", "i5010837"}
|
|
CompanyAliases map[GeneticsCompany][]string
|
|
|
|
// Reference name -> Reference link
|
|
References map[string]string
|
|
}
|
|
|
|
// We use this data structure to save space, rather than using String
|
|
type GeneticsCompany byte
|
|
|
|
const TwentyThreeAndMe GeneticsCompany = 1
|
|
const FamilyTreeDNA GeneticsCompany = 2
|
|
const MyHeritage GeneticsCompany = 3
|
|
|
|
// Map Structure: RSID -> LocusMetadata object
|
|
var lociMetadataMap map[int64]LocusMetadata
|
|
|
|
// This map stores a list of aliases for rsids which have aliases
|
|
// An alias is a different rsid which represents the same locus
|
|
var rsidAliasesMap map[int64][]int64
|
|
|
|
// We use these maps to store the locus aliases for rsIDs used by companies
|
|
// Map structure: Alias -> Primary rsID (there may be aliases)
|
|
// Example: "i5010839" -> 78655421
|
|
var companyAliasesMap_23andMe map[string]int64
|
|
var companyAliasesMap_FamilyTreeDNA map[string]int64
|
|
var companyAliasesMap_MyHeritage map[string]int64
|
|
|
|
|
|
func InitializeLocusMetadataVariables()error{
|
|
|
|
lociMetadataMap = make(map[int64]LocusMetadata)
|
|
rsidAliasesMap = make(map[int64][]int64)
|
|
|
|
companyAliasesMap_23andMe = make(map[string]int64)
|
|
companyAliasesMap_FamilyTreeDNA = make(map[string]int64)
|
|
companyAliasesMap_MyHeritage = make(map[string]int64)
|
|
|
|
locusObjectsList, err := GetLocusMetadataObjectsList()
|
|
if (err != nil) { return err }
|
|
|
|
for _, locusObject := range locusObjectsList{
|
|
|
|
rsidsList := locusObject.RSIDsList
|
|
|
|
for _, rsid := range rsidsList{
|
|
|
|
_, exists := lociMetadataMap[rsid]
|
|
if (exists == true){
|
|
return errors.New("lociMetadataMap contains duplicate rsid.")
|
|
}
|
|
|
|
lociMetadataMap[rsid] = locusObject
|
|
}
|
|
|
|
if (len(rsidsList) > 1){
|
|
|
|
// We add rsid aliases to map
|
|
|
|
for _, rsid := range rsidsList{
|
|
|
|
rsidAliasesList := make([]int64, 0)
|
|
|
|
for _, rsidInner := range rsidsList{
|
|
|
|
if (rsid != rsidInner){
|
|
rsidAliasesList = append(rsidAliasesList, rsidInner)
|
|
}
|
|
}
|
|
|
|
rsidAliasesMap[rsid] = rsidAliasesList
|
|
}
|
|
}
|
|
|
|
companyAliasesMap := locusObject.CompanyAliases
|
|
|
|
if (len(companyAliasesMap) > 0){
|
|
|
|
// Now we add company aliases to maps
|
|
|
|
primaryRSID := rsidsList[0]
|
|
|
|
for companyObject, companyAliasesList := range companyAliasesMap{
|
|
|
|
if (companyObject == TwentyThreeAndMe){
|
|
|
|
for _, locusAlias := range companyAliasesList{
|
|
companyAliasesMap_23andMe[locusAlias] = primaryRSID
|
|
}
|
|
|
|
} else if (companyObject == FamilyTreeDNA){
|
|
|
|
for _, locusAlias := range companyAliasesList{
|
|
companyAliasesMap_FamilyTreeDNA[locusAlias] = primaryRSID
|
|
}
|
|
|
|
} else if (companyObject == MyHeritage){
|
|
|
|
for _, locusAlias := range companyAliasesList{
|
|
companyAliasesMap_MyHeritage[locusAlias] = primaryRSID
|
|
}
|
|
|
|
} else {
|
|
companyByteString := helpers.ConvertIntToString(int(companyObject))
|
|
return errors.New("Locus Object company aliases map contains invalid company object: " + companyByteString)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
//Outputs:
|
|
// -bool: Locus metadata exists
|
|
// -LocusMetadata
|
|
// -error
|
|
func GetLocusMetadata(inputRSID int64)(bool, LocusMetadata, error){
|
|
|
|
if (lociMetadataMap == nil){
|
|
return false, LocusMetadata{}, errors.New("GetLocusMetadata called when lociMetadataMap is not initialized.")
|
|
}
|
|
|
|
locusMetadataObject, exists := lociMetadataMap[inputRSID]
|
|
if (exists == false){
|
|
return false, LocusMetadata{}, nil
|
|
}
|
|
|
|
return true, locusMetadataObject, nil
|
|
}
|
|
|
|
// This function will return a list of RSIDs which refer to the same location as the input RSID
|
|
// -bool: Any Aliases exist
|
|
// -[]int64: List of alias RSIDs
|
|
// -error (if RSID is unknown)
|
|
func GetRSIDAliases(inputRSID int64)(bool, []int64, error){
|
|
|
|
if (rsidAliasesMap == nil){
|
|
return false, nil, errors.New("rsidAliasesMap called when rsidAliasesMap is not initialized.")
|
|
}
|
|
|
|
aliasesList, exists := rsidAliasesMap[inputRSID]
|
|
if (exists == false){
|
|
return false, nil, nil
|
|
}
|
|
|
|
return true, aliasesList, nil
|
|
}
|
|
|
|
|
|
//Outputs:
|
|
// -bool: Alias found
|
|
// -int64: Primary rsID alias to use to represent this locus
|
|
// -error
|
|
func GetCompanyAliasRSID(companyName string, locusAlias string)(bool, int64, error){
|
|
|
|
if (companyName == "23andMe"){
|
|
|
|
locusRSID, exists := companyAliasesMap_23andMe[locusAlias]
|
|
if (exists == false){
|
|
return false, 0, nil
|
|
}
|
|
|
|
return true, locusRSID, nil
|
|
|
|
} else if (companyName == "FamilyTreeDNA"){
|
|
|
|
locusRSID, exists := companyAliasesMap_FamilyTreeDNA[locusAlias]
|
|
if (exists == false){
|
|
return false, 0, nil
|
|
}
|
|
|
|
return true, locusRSID, nil
|
|
|
|
} else if (companyName == "MyHeritage"){
|
|
|
|
locusRSID, exists := companyAliasesMap_MyHeritage[locusAlias]
|
|
if (exists == false){
|
|
return false, 0, nil
|
|
}
|
|
|
|
return true, locusRSID, nil
|
|
}
|
|
|
|
return false, 0, errors.New("GetCompanyAliasRSID called with invalid companyName: " + companyName)
|
|
}
|
|
|
|
|
|
// This function is only public for use in testing
|
|
func GetLocusMetadataObjectsList()([]LocusMetadata, error){
|
|
|
|
chromosomesList := []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22}
|
|
|
|
locusMetadataObjectsList := make([]LocusMetadata, 0, len(chromosomesList))
|
|
|
|
for _, chromosomesInt := range chromosomesList{
|
|
|
|
chromosomeLocusMetadataObjectsList, err := GetLocusMetadataObjectsListByChromosome(chromosomesInt)
|
|
if (err != nil){ return nil, err }
|
|
|
|
locusMetadataObjectsList = append(locusMetadataObjectsList, chromosomeLocusMetadataObjectsList...)
|
|
}
|
|
|
|
return locusMetadataObjectsList, nil
|
|
}
|
|
|
|
|
|
func GetLocusMetadataObjectsListByChromosome(chromosome int)([]LocusMetadata, error){
|
|
|
|
if (chromosome < 1 || chromosome > 22){
|
|
chromosomeString := helpers.ConvertIntToString(chromosome)
|
|
return nil, errors.New("GetLocusMetadataObjectsListByChromosome called with invalid chromosome: " + chromosomeString)
|
|
}
|
|
|
|
// Outputs:
|
|
// -bool: File exists
|
|
// -[]byte: File bytes
|
|
getFileBytes := func()(bool, []byte){
|
|
|
|
if (chromosome == 1){
|
|
return true, LocusMetadataFile_Chromosome1
|
|
}
|
|
if (chromosome == 2){
|
|
return true, LocusMetadataFile_Chromosome2
|
|
}
|
|
if (chromosome == 3){
|
|
return true, LocusMetadataFile_Chromosome3
|
|
}
|
|
if (chromosome == 4){
|
|
return true, LocusMetadataFile_Chromosome4
|
|
}
|
|
if (chromosome == 5){
|
|
return true, LocusMetadataFile_Chromosome5
|
|
}
|
|
if (chromosome == 6){
|
|
return true, LocusMetadataFile_Chromosome6
|
|
}
|
|
if (chromosome == 7){
|
|
return true, LocusMetadataFile_Chromosome7
|
|
}
|
|
if (chromosome == 8){
|
|
return true, LocusMetadataFile_Chromosome8
|
|
}
|
|
if (chromosome == 9){
|
|
return true, LocusMetadataFile_Chromosome9
|
|
}
|
|
if (chromosome == 10){
|
|
return true, LocusMetadataFile_Chromosome10
|
|
}
|
|
if (chromosome == 11){
|
|
return true, LocusMetadataFile_Chromosome11
|
|
}
|
|
if (chromosome == 12){
|
|
return true, LocusMetadataFile_Chromosome12
|
|
}
|
|
if (chromosome == 13){
|
|
return true, LocusMetadataFile_Chromosome13
|
|
}
|
|
if (chromosome == 14){
|
|
return true, LocusMetadataFile_Chromosome14
|
|
}
|
|
if (chromosome == 15){
|
|
return true, LocusMetadataFile_Chromosome15
|
|
}
|
|
if (chromosome == 16){
|
|
return true, LocusMetadataFile_Chromosome16
|
|
}
|
|
if (chromosome == 17){
|
|
return true, LocusMetadataFile_Chromosome17
|
|
}
|
|
//if (chromosome == 18){
|
|
// return true, LocusMetadataFile_Chromosome18
|
|
//}
|
|
if (chromosome == 19){
|
|
return true, LocusMetadataFile_Chromosome19
|
|
}
|
|
if (chromosome == 20){
|
|
return true, LocusMetadataFile_Chromosome20
|
|
}
|
|
if (chromosome == 21){
|
|
return true, LocusMetadataFile_Chromosome21
|
|
}
|
|
if (chromosome == 22){
|
|
return true, LocusMetadataFile_Chromosome22
|
|
}
|
|
return false, nil
|
|
}
|
|
|
|
fileExists, fileBytes := getFileBytes()
|
|
if (fileExists == false){
|
|
// No loci exist for this chromosome
|
|
emptyList := make([]LocusMetadata, 0)
|
|
return emptyList, nil
|
|
}
|
|
|
|
var locusMetadataObjectsList []LocusMetadata
|
|
|
|
err := json.Unmarshal(fileBytes, &locusMetadataObjectsList)
|
|
if (err != nil) { return nil, err }
|
|
|
|
return locusMetadataObjectsList, nil
|
|
}
|
|
|
|
|
|
|