seekia/utilities/createCitiesFile/createCitiesFile.go

242 lines
7.6 KiB
Go

// This utility will create a .messagepack file containing the information for all cities from the Countries States Cities database
// We remove fields that we don't need and remove cities without latitude and longitude data.
// We do this to make the file smaller, and thus make Seekia a smaller download for users.
// We also remove duplicate coordinates from the data.
// This input file is 50.8 MB, the output file is 6.4 MB
// The output file should be placed in /resources/worldLocations.
// You have to download the database file to generate the output worldCities.messagepack file.
// We are not including the input file in the source code so that the source code download is smaller
// The database repository: https://github.com/dr5hn/countries-states-cities-database/
//
// We are using version 2.2 of the database
// File to download: v2.2.tar.gz
// Download Link: https://github.com/dr5hn/countries-states-cities-database/archive/refs/tags/v2.2.tar.gz
// v2.2.tar.gz SHA256 checksum: fc9ed9642906d8629059c06094ed1ce53ad4fa510991a6485ff3422962cff1b9
//
// First, you must extract the downloaded .tar.gz file.
//
// We use the file cities.json as the input to the createCitiesFile utility.
// cities.json SHA256 checksum: b82a21f2c8402041c00787131f206eb06d931d12d062f4a8a0f2b07075263c99
//
// Output file: worldCities.messagepack
// worldCities.messagepack SHA256 checksum: 0c42a4c51db7f42dcc71035e375f010a5f80c41cd89d87f581a93f3226bf650d
package main
import "seekia/resources/worldLocations"
import "seekia/internal/encoding"
import "seekia/internal/helpers"
import "seekia/internal/localFilesystem"
import messagepack "github.com/vmihailenco/msgpack/v5"
import "errors"
import "encoding/json"
import "log"
func main(){
createCitiesFileFunction := func()error{
type cityItem struct{
// Name of the city
Name string
// Name of state city is in
State_name string
// Name of country city is in
Country_name string
Latitude string
Longitude string
}
fileExists, fileContents, err := localFilesystem.GetFileContents("./cities.json")
if (err != nil){ return err }
if (fileExists == false){
return errors.New("cities.json file was not found.")
}
var citiesList []cityItem
err = json.Unmarshal(fileContents, &citiesList)
if (err != nil){ return err }
//Map Structure: Name in input file -> Name we replace it with in our new file
countryNamesToReplaceMap := map[string]string{
"Bonaire, Sint Eustatius and Saba": "Bonaire",
"Cote D'Ivoire (Ivory Coast)": "Cote D'Ivoire",
"Gambia The": "Gambia",
"Virgin Islands (US)": "United States",
"Papua new Guinea": "Papua New Guinea",
"Antigua and Barbuda": "Antigua And Barbuda",
"Hong Kong S.A.R.": "China",
"Saint Kitts and Nevis": "Saint Kitts And Nevis",
"Saint Vincent and the Grenadines": "Saint Vincent And The Grenadines",
"Trinidad and Tobago": "Trinidad And Tobago",
}
allCountryObjectsList, err := worldLocations.GetAllCountryObjectsList()
if (err != nil) { return err }
// Map Structure: Country Name -> Country Identifier
countryIdentifiersMap := make(map[string]int)
for _, countryObject := range allCountryObjectsList{
countryIdentifier := countryObject.Identifier
countryNamesList := countryObject.NamesList
for _, countryName := range countryNamesList{
countryIdentifiersMap[countryName] = countryIdentifier
}
}
// We encode a messagepack file with the cities:
// It is a slice of messagepack-encoded slices
// Each slice represents a city object
// []{Name (string), State (string), Country Identifier (int), Latitude (float64), Longitude (float64)}
newLocationsSlice := make([]messagepack.RawMessage, 0, 148402)
type coordinatesObject struct{
Latitude float64
Longitude float64
}
// We use this map to make sure no duplicate coordinate pairs are added
existingCoordinatesMap := make(map[coordinatesObject]struct{})
for _, cityItem := range citiesList{
cityName := cityItem.Name
cityState := cityItem.State_name
cityCountry := cityItem.Country_name
//Outputs:
// -bool: Coordinates exist
// -float64: Latitude
// -float64: Longitude
// -error
getCityCoordinates := func()(bool, float64, float64, error){
cityLatitude := cityItem.Latitude
cityLongitude := cityItem.Longitude
cityLatitudeFloat64, err := helpers.ConvertStringToFloat64(cityLatitude)
if (err != nil){ return false, 0, 0, err }
cityLongitudeFloat64, err := helpers.ConvertStringToFloat64(cityLongitude)
if (err != nil){ return false, 0, 0, err }
if (cityLatitudeFloat64 == 0 && cityLongitudeFloat64 == 0){
return false, 0, 0, nil
}
cityCoordinatesObject := coordinatesObject{
Latitude: cityLatitudeFloat64,
Longitude: cityLongitudeFloat64,
}
_, exists := existingCoordinatesMap[cityCoordinatesObject]
if (exists == false){
return true, cityLatitudeFloat64, cityLongitudeFloat64, nil
}
// The database has a duplicate.
// We will slightly shift the coordinate until it is not a duplicate
for i := 1; i < 1000; i++{
newCityLatitude := cityLatitudeFloat64 + (.00000001 * float64(i))
newCityCoordinatesObject := coordinatesObject{
Latitude: newCityLatitude,
Longitude: cityLongitudeFloat64,
}
_, exists := existingCoordinatesMap[newCityCoordinatesObject]
if (exists == false){
return true, newCityLatitude, cityLongitudeFloat64, nil
}
}
return false, 0, 0, errors.New("Too many coordinate collisions: " + cityName)
}
coordinatesExist, cityLatitude, cityLongitude, err := getCityCoordinates()
if (err != nil) { return err }
if (coordinatesExist == false){
// This city has no coordinate info. Skip it.
continue
}
getCountryName := func()string{
newName, exists := countryNamesToReplaceMap[cityCountry]
if (exists == false){
return cityCountry
}
return newName
}
countryName := getCountryName()
countryIdentifier, exists := countryIdentifiersMap[countryName]
if (exists == false){
return errors.New("countryIdentifiersMap missing countryName: " + countryName)
}
cityNameMessagepack, err := encoding.EncodeMessagePackBytes(cityName)
if (err != nil) { return err }
cityStateMessagepack, err := encoding.EncodeMessagePackBytes(cityState)
if (err != nil) { return err }
cityCountryIdentifierMessagepack, err := encoding.EncodeMessagePackBytes(countryIdentifier)
if (err != nil) { return err }
cityLatitudeMessagepack, err := encoding.EncodeMessagePackBytes(cityLatitude)
if (err != nil) { return err }
cityLongitudeMessagepack, err := encoding.EncodeMessagePackBytes(cityLongitude)
if (err != nil) { return err }
cityMessagepackSlice := []messagepack.RawMessage{cityNameMessagepack, cityStateMessagepack, cityCountryIdentifierMessagepack, cityLatitudeMessagepack, cityLongitudeMessagepack}
cityEncodedMessagepack, err := encoding.EncodeMessagePackBytes(cityMessagepackSlice)
if (err != nil){ return err }
newLocationsSlice = append(newLocationsSlice, cityEncodedMessagepack)
newCityCoordinatesObject := coordinatesObject{
Latitude: cityLatitude,
Longitude: cityLongitude,
}
existingCoordinatesMap[newCityCoordinatesObject] = struct{}{}
}
newFileBytes, err := encoding.EncodeMessagePackBytes(newLocationsSlice)
if (err != nil){ return err }
err = localFilesystem.CreateOrOverwriteFile(newFileBytes, "./", "worldCities.messagepack")
if (err != nil){ return err }
return nil
}
err := createCitiesFileFunction()
if (err != nil){
log.Println("Failed to create cities file. Reason: " + err.Error())
} else {
log.Println("Successfully created cities file!")
}
}