401 lines
7.6 KiB
Go
401 lines
7.6 KiB
Go
|
package readBiobankData
|
||
|
|
||
|
// file openSNP.go provides a datastructure and function to read data from the OpenSNP.org biobank.
|
||
|
|
||
|
import "seekia/internal/helpers"
|
||
|
|
||
|
import "encoding/csv"
|
||
|
import "os"
|
||
|
import "io"
|
||
|
|
||
|
|
||
|
type PhenotypeData_OpenSNP struct{
|
||
|
|
||
|
UserID int
|
||
|
|
||
|
|
||
|
EyeColorIsKnown bool
|
||
|
|
||
|
// Either "Green", "Blue", "Hazel", or "Brown"
|
||
|
EyeColor string
|
||
|
|
||
|
|
||
|
LactoseToleranceIsKnown bool
|
||
|
|
||
|
// true == Is lactose Tolerant
|
||
|
LactoseTolerance bool
|
||
|
|
||
|
|
||
|
HairColorIsKnown bool
|
||
|
|
||
|
HairColor string
|
||
|
|
||
|
|
||
|
HeightIsKnown bool
|
||
|
|
||
|
// Height is expressed in centimeters
|
||
|
Height float64
|
||
|
}
|
||
|
|
||
|
// This function reads the phenotypes_202308230100.csv file in the openSNP biobank data.
|
||
|
//
|
||
|
//Outputs:
|
||
|
// -bool: Able to read file (file is well formed and not corrupt)
|
||
|
// -[]PhenotypeData_OpenSNP
|
||
|
func ReadOpenSNPPhenotypesFile(fileObject *os.File)(bool, []PhenotypeData_OpenSNP){
|
||
|
|
||
|
csvFileReader := csv.NewReader(fileObject)
|
||
|
csvFileReader.LazyQuotes = true
|
||
|
csvFileReader.Comma = ';'
|
||
|
|
||
|
// First we read the first line (header line)
|
||
|
|
||
|
_, err := csvFileReader.Read()
|
||
|
if (err != nil){
|
||
|
|
||
|
// File is corrupt
|
||
|
return false, nil
|
||
|
}
|
||
|
|
||
|
// Now we iterate through each user's phenotype data
|
||
|
|
||
|
//Map Structure: User ID -> Phenotype data object
|
||
|
userPhenotypeDataMap := make(map[int]PhenotypeData_OpenSNP)
|
||
|
|
||
|
for {
|
||
|
|
||
|
userDataLineSlice, err := csvFileReader.Read()
|
||
|
if (err != nil) {
|
||
|
|
||
|
if (err == io.EOF){
|
||
|
// We have reached the end of the file
|
||
|
break
|
||
|
}
|
||
|
// File is corrupt
|
||
|
return false, nil
|
||
|
}
|
||
|
|
||
|
userIDString := userDataLineSlice[0]
|
||
|
|
||
|
userID, err := helpers.ConvertStringToInt(userIDString)
|
||
|
if (err != nil){
|
||
|
// File is corrupt
|
||
|
return false, nil
|
||
|
}
|
||
|
|
||
|
_, exists := userPhenotypeDataMap[userID]
|
||
|
if (exists == true){
|
||
|
|
||
|
// This user has multiple entries
|
||
|
// Each entry is identical except for the raw genome filename
|
||
|
// We will continue
|
||
|
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
//Outputs:
|
||
|
// -bool: User eye color is known
|
||
|
// -string: User eye color
|
||
|
getUserEyeColor := func()(bool, string){
|
||
|
|
||
|
userEyeColorRaw := userDataLineSlice[5]
|
||
|
|
||
|
switch userEyeColorRaw{
|
||
|
|
||
|
case "-":{
|
||
|
return false, ""
|
||
|
}
|
||
|
case "Brown",
|
||
|
"brown",
|
||
|
"Dark brown",
|
||
|
"Brown/black",
|
||
|
"Grey brown":{
|
||
|
|
||
|
return true, "Brown"
|
||
|
}
|
||
|
case "Hazel",
|
||
|
"hazel",
|
||
|
"Brown-green",
|
||
|
"brown-green",
|
||
|
"Hazel/Light Brown",
|
||
|
"Hazel (light brown, dark green, dark blue)",
|
||
|
"Brown-amber",
|
||
|
"Indeterminate brown-green with a subtle grey caste",
|
||
|
"Hazel (brown/green)",
|
||
|
"Green-hazel",
|
||
|
"Amber - (yellow/ocre brown)",
|
||
|
"Hazel/light brown",
|
||
|
"Green-brown",
|
||
|
"green-brown",
|
||
|
"Brown - brown and green in bright sunlight",
|
||
|
"Hazel/yellow",
|
||
|
"Brown-(green when external temperature rises)",
|
||
|
"Ambar-green",
|
||
|
"Olive-brown ringing burnt umber-brown",
|
||
|
"Green with brown freckles",
|
||
|
"Green-Hazel",
|
||
|
"Ambar-Green",
|
||
|
"Brown-Amber",
|
||
|
"Hazel/Yellow",
|
||
|
"Brown center starburst, amber and olive green, with dark gray outer ring":{
|
||
|
|
||
|
return true, "Hazel"
|
||
|
}
|
||
|
case "Blue",
|
||
|
"Blue-grey",
|
||
|
"Blue grey",
|
||
|
"Gray-blue",
|
||
|
"Blue-grey with central heterochromia",
|
||
|
"Dark blue",
|
||
|
"blue",
|
||
|
"Light blue-green",
|
||
|
"blue-grey",
|
||
|
"Dark Grayish-Blue Eyes (like a stone)":{
|
||
|
|
||
|
return true, "Blue"
|
||
|
}
|
||
|
case "Green",
|
||
|
"Green ",
|
||
|
"green",
|
||
|
"Green-gray",
|
||
|
"Blue-green ",
|
||
|
"Blue-green",
|
||
|
"blue-green ",
|
||
|
"Light-mixed green",
|
||
|
"blue-green",
|
||
|
"Blue with a yellow ring of flecks that make my eyes look green depending on the light or my mood",
|
||
|
"Light-mixed Green",
|
||
|
"Blue-green heterochromia",
|
||
|
"Blue-green-grey":{
|
||
|
|
||
|
return true, "Green"
|
||
|
}
|
||
|
|
||
|
//TODO: Add grey as its own seperate color?
|
||
|
}
|
||
|
|
||
|
return false, ""
|
||
|
}
|
||
|
|
||
|
userEyeColorIsKnown, userEyeColor := getUserEyeColor()
|
||
|
|
||
|
//Outputs:
|
||
|
// -bool: User lactose Tolerance is known
|
||
|
// -bool: User lactose Tolerance
|
||
|
getUserLactoseTolerance := func()(bool, bool){
|
||
|
|
||
|
userLactoseToleranceRaw := userDataLineSlice[6]
|
||
|
|
||
|
switch userLactoseToleranceRaw{
|
||
|
|
||
|
case "-":{
|
||
|
return false, false
|
||
|
}
|
||
|
case "Yes",
|
||
|
"Lactose-intolerant",
|
||
|
"Lactose intolerant",
|
||
|
"lactose-intolerant",
|
||
|
" allergic to all forms of dairy ",
|
||
|
" Allergic to all forms of dairy ",
|
||
|
"Severe gi pain ",
|
||
|
"severe GI pain ":{
|
||
|
|
||
|
return true, false
|
||
|
}
|
||
|
case "No",
|
||
|
"Lactose-tolerant",
|
||
|
"Lactose tolerant",
|
||
|
"lactose-tolerant",
|
||
|
"lactose tolerant",
|
||
|
"False":{
|
||
|
return true, true
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return false, false
|
||
|
}
|
||
|
|
||
|
userLactoseToleranceIsKnown, userLactoseTolerance := getUserLactoseTolerance()
|
||
|
|
||
|
//Outputs:
|
||
|
// -bool: User hair color is known
|
||
|
// -string: Hair Color
|
||
|
getUserHairColor := func()(bool, string){
|
||
|
|
||
|
//userHairColorRaw := userDataLineSlice[11]
|
||
|
|
||
|
//TODO
|
||
|
|
||
|
return false, ""
|
||
|
}
|
||
|
|
||
|
userHairColorIsKnown, userHairColor := getUserHairColor()
|
||
|
|
||
|
|
||
|
// Outputs:
|
||
|
// -bool: User height is known
|
||
|
// -int: User height (in centimeters)
|
||
|
getUserHeight := func()(bool, float64){
|
||
|
|
||
|
userHeightRaw := userDataLineSlice[13]
|
||
|
|
||
|
switch userHeightRaw{
|
||
|
case "-":{
|
||
|
return false, 0
|
||
|
}
|
||
|
case `4'0"`:{
|
||
|
return true, 121.92
|
||
|
}
|
||
|
case `4'1"`:{
|
||
|
return true, 124.46
|
||
|
}
|
||
|
case `4'2"`:{
|
||
|
return true, 127
|
||
|
}
|
||
|
case `4'3"`:{
|
||
|
return true, 129.54
|
||
|
}
|
||
|
case `4'4"`:{
|
||
|
return true, 132.08
|
||
|
}
|
||
|
case `4'5"`:{
|
||
|
return true, 134.62
|
||
|
}
|
||
|
case `4'6"`:{
|
||
|
return true, 137.16
|
||
|
}
|
||
|
case `4'7"`:{
|
||
|
return true, 139.7
|
||
|
}
|
||
|
case `4'8"`:{
|
||
|
return true, 142.24
|
||
|
}
|
||
|
case `4'9"`:{
|
||
|
return true, 144.78
|
||
|
}
|
||
|
case `4'10"`:{
|
||
|
return true, 147.32
|
||
|
}
|
||
|
case `4'11"`:{
|
||
|
return true, 149.86
|
||
|
}
|
||
|
case `5'`:{
|
||
|
return true, 152.4
|
||
|
}
|
||
|
case `5'1"`:{
|
||
|
return true, 154.94
|
||
|
}
|
||
|
case `5'2"`:{
|
||
|
return true, 157.48
|
||
|
}
|
||
|
case `5'3"`, `5'3''`, `160 cm`:{
|
||
|
return true, 160
|
||
|
}
|
||
|
case `5'4"`:{
|
||
|
return true, 162.56
|
||
|
}
|
||
|
case `5'5"`:{
|
||
|
return true, 165.1
|
||
|
}
|
||
|
case `5'6"`:{
|
||
|
return true, 167.64
|
||
|
}
|
||
|
case `168 cm`:{
|
||
|
return true, 168
|
||
|
}
|
||
|
case `5'7"`:{
|
||
|
return true, 170.18
|
||
|
}
|
||
|
case `5'8"`:{
|
||
|
return true, 172.72
|
||
|
}
|
||
|
case `5'9"`:{
|
||
|
return true, 175.26
|
||
|
}
|
||
|
case `5'10"`, `5'10''`:{
|
||
|
return true, 177.8
|
||
|
}
|
||
|
case `179 cm`:{
|
||
|
return true, 179
|
||
|
}
|
||
|
case `180cm`:{
|
||
|
return true, 180
|
||
|
}
|
||
|
case `5'11"`:{
|
||
|
return true, 180.34
|
||
|
}
|
||
|
case `6'`:{
|
||
|
return true, 182.88
|
||
|
}
|
||
|
case `183 cm`:{
|
||
|
return true, 183
|
||
|
}
|
||
|
case `6'1"`:{
|
||
|
return true, 185.42
|
||
|
}
|
||
|
case `6'2"`:{
|
||
|
return true, 187.96
|
||
|
}
|
||
|
case `6'3"`:{
|
||
|
return true, 190.5
|
||
|
}
|
||
|
case `6'4"`:{
|
||
|
return true, 193.04
|
||
|
}
|
||
|
case `6'5"`:{
|
||
|
return true, 195.58
|
||
|
}
|
||
|
case `6'6"`:{
|
||
|
return true, 198.12
|
||
|
}
|
||
|
case `6'7"`:{
|
||
|
return true, 200.66
|
||
|
}
|
||
|
case `6'8"`:{
|
||
|
return true, 203.2
|
||
|
}
|
||
|
case `6'9"`:{
|
||
|
return true, 205.74
|
||
|
}
|
||
|
case `6'10"`:{
|
||
|
return true, 208.28
|
||
|
}
|
||
|
case `6'11"`:{
|
||
|
return true, 210.82
|
||
|
}
|
||
|
case `7'`:{
|
||
|
return true, 213.36
|
||
|
}
|
||
|
|
||
|
//TODO: Add more responses
|
||
|
}
|
||
|
|
||
|
return false, 0
|
||
|
}
|
||
|
|
||
|
userHeightIsKnown, userHeight := getUserHeight()
|
||
|
|
||
|
userPhenotypeDataObject := PhenotypeData_OpenSNP{
|
||
|
UserID: userID,
|
||
|
EyeColorIsKnown: userEyeColorIsKnown,
|
||
|
EyeColor: userEyeColor,
|
||
|
LactoseToleranceIsKnown: userLactoseToleranceIsKnown,
|
||
|
LactoseTolerance: userLactoseTolerance,
|
||
|
HairColorIsKnown: userHairColorIsKnown,
|
||
|
HairColor: userHairColor,
|
||
|
HeightIsKnown: userHeightIsKnown,
|
||
|
Height: userHeight,
|
||
|
}
|
||
|
|
||
|
userPhenotypeDataMap[userID] = userPhenotypeDataObject
|
||
|
}
|
||
|
|
||
|
userPhenotypeDataList := helpers.GetListOfMapValues(userPhenotypeDataMap)
|
||
|
|
||
|
return true, userPhenotypeDataList
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
|