Paleobiology DataBase Explorer

This shiny app allows you to explore the Paleobiology Database (PBDB) data for occurrences of a specific taxon within a geologic period. You can select two variables to compare and filter the data by their values. The app will display a bar plot with the count of occurrences of one variable by the other.

#| standalone: true
library(shiny)
library(ggplot2)
library(plotly)
library(jsonlite)
library(countrycode)
library(shinyWidgets)

# Diccionario de nombres descriptivos
variable_names <- c(
    oid = "Occurrence Number",
    eid = "Reidentification Number",
    cid = "Collection Number",
    idn = "Identified Name",
    iid = "Identified Number",
    tdf = "Difference",
    tna = "Accepted Name",
    rnk = "Accepted Rank",
    tid = "Accepted Number",
    oei = "Early Interval",
    eag = "Max Ma",
    lag = "Min Ma",
    aut = "Reference Author",
    pby = "Reference Publication Year",
    rid = "Reference Number",
    phl = "Phylum",
    cll = "Class",
    odl = "Order",
    fml = "Family",
    gnl = "Genus",
    abv = "Abundance Value",
    abu = "Abundance Unit",
    lng = "Longitude",
    lat = "Latitude",
    cnm = "Collection Name",
    aka = "Collection AKA",
    cc2 = "Country",
    stp = "State",
    cny = "County",
    prc = "Precision",
    gsc = "Geographic Scale",
    ggc = "Geographic Comments",
    pm1 = "Paleomodel",
    gpl = "Geoplate",
    ps1 = "Paleoage",
    pln = "Paleolongitude",
    pla = "Paleolatitude",
    sfm = "Formation",
    ssc = "Stratigraphic Scale",
    scm = "Stratigraphic Comments",
    ldc = "Lithology Description",
    lt1 = "Lithology 1",
    la1 = "Lithology Adjective 1",
    lm1 = "Minor Lithology 1",
    ff1 = "Fossils from Lithology 1",
    lt2 = "Lithology 2",
    la2 = "Lithology Adjective 2",
    env = "Environment",
    tec = "Tectonic Setting",
    cps = "Component Size Classes",
    cpb = "Associated Parts",
    tpm = "Preservation Modes",
    tpq = "Preservation Quality",
    tps = "Spatial Resolution",
    tpc = "Concentration",
    cct = "Collection Type",
    ccx = "Collection Methods",
    ccv = "Collection Coverage",
    ccs = "Collection Size",
    ccc = "Collectors",
    ccd = "Collection Dates",
    jev = "Taxon Environment",
    jmo = "Motility",
    jlh = "Life Habit",
    jdt = "Diet",
    jre = "Reproduction",
    jon = "Ontogeny",
    jco = "Composition",
    jsa = "Skeletal Architecture",
    jth = "Theropod",
    idr = "Identified Rank",
    oli = "Late Interval",
    ocm = "Occurrence Comments",
    sgr = "Stratigraphic Group",
    lm2 = "Minor Lithology 2",
    ff2 = "Fossils from Lithology 2",
    gcm = "Geology Comments",
    cpa = "Articulated Parts",
    tpt = "Temporal Resolution",
    ptd = "Protected Status",
    lf1 = "Lithification 1",
    lf2 = "Lithification 2",
    tpo = "Orientation",
    tpf = "Fragmentation",
    ccm = "Collection Comments",
    tcm = "Taxonomy Comments",
    tpb = "Bioerosion",
    pcm = "Preservation Comments",
    flg = "Flag",
    cpc = "Common Body Parts",
    smb = "Stratigraphic Member",
    sls = "Local Section",
    slb = "Local Bed",
    slu = "Local Bed Unit",
    slo = "Local Order",
    ccu = "Museum",
    tpe = "Encrustation",
    cpt = "Feeding/Predation Traces",
    tpl = "Lagerstätten",
    tpa = "Abundance in Sediment",
    tpr = "Sorting",
    acm = "Component Comments",
    cns = "Census",
    ccr = "Rock Censused",
    cpd = "Collection Period",
    szn = "Stratigraphic Zone",
    srs = "Regional Section",
    srb = "Regional Bed",
    sro = "Regional Order"
)

ui <- fluidPage(
    fluidRow(
        column(6,
            textInput("taxon", "Enter Taxon", value = "")
        ),
        column(6,
            selectInput("geologic_period", "Geologic Period", 
                        choices = c("Precambrian", "Cambrian", "Ordovician", "Silurian", 
                                    "Devonian", "Carboniferous", "Permian", "Triassic", 
                                    "Jurassic", "Cretaceous", "Paleogene", "Neogene", 
                                    "Quaternary"))
        )
    ),
    fluidRow(
        column(12,
            actionButton("search", "Search")
        )
    ),
    
    fluidRow(
        column(6,
            selectInput("x_variable", "Select Y Variable", choices = NULL),
            uiOutput("x_values_ui")
        ),
        column(6,
            selectInput("y_variable", "Select X Variable", choices = NULL),
            uiOutput("y_values_ui")
        )
    ),
    
    fluidRow(
        column(12,
            plotlyOutput("interactive_plot", width = "90vw", height = "90vh"),
            #verbatimTextOutput("data_head")
        )
    )
)

server <- function(input, output, session) {
    pbdb_data <- reactiveVal(NULL)
    
    observeEvent(input$search, {
        req(input$taxon, input$geologic_period)
        
        # Define the URL for the PBDB data in JSON format
        data_url <- paste0("https://paleobiodb.org/data1.2/occs/list.json?base_name=", 
                           input$taxon, "&interval=", input$geologic_period, "&show=full,loc")
        
        # Download and read the data directly
        data <- tryCatch({
            fromJSON(data_url)$records
        }, error = function(e) {
            cat("Error reading JSON data:", e$message, "\n")
            return(NULL)
        })
        
        if (is.null(data)) {
            cat("No data to display due to error.\n")
            return(NULL)
        }
        
        cat("Data loaded with", nrow(data), "rows\n")
        cat("Variables:", ncol(data), "\n", names(data), "\n")
        
        # Convertir códigos de país a nombres de país
        data$cc2 <- countrycode::countrycode(data$cc2, origin = "iso2c", destination = "country.name")
        
        # Corregir manualmente los nombres de país específicos
        data$cc2[is.na(data$cc2)] <- "Unknown"
        data$cc2[data$cc2 == "AA"] <- "Aruba"
        data$cc2[data$cc2 == "UK"] <- "United Kingdom"
        
        # Update the reactive value
        pbdb_data(data)
        
        # Update the selectInput choices with the variable names and their descriptions
        choices <- setNames(names(data), variable_names[names(data)])
        updateSelectInput(session, "x_variable", choices = sort(choices))
        updateSelectInput(session, "y_variable", choices = sort(choices))
        
        # Render the head of the data with only the first 4 columns
        output$data_head <- renderPrint({
            head(data[, 1:4])
        })
    })
    
    observeEvent(input$x_variable, {
        req(input$x_variable)
        data <- pbdb_data()
        req(data)
        
        # Get unique values for the selected x_variable
        unique_x_values <- unique(data[[input$x_variable]])
        
        # Create checkboxes for unique x_variable values
        output$x_values_ui <- renderUI({
            pickerInput("x_values", "Select X Values", choices = unique_x_values, selected = unique_x_values, multiple = TRUE, options = list(`actions-box` = TRUE, `live-search` = TRUE, `size` = 10))
        })
    })
    
    observeEvent(input$y_variable, {
        req(input$y_variable)
        data <- pbdb_data()
        req(data)
        
        # Get unique values for the selected y_variable
        unique_y_values <- unique(data[[input$y_variable]])
        
        # Create checkboxes for unique y_variable values
        output$y_values_ui <- renderUI({
            pickerInput("y_values", "Select Y Values", choices = unique_y_values, selected = unique_y_values, multiple = TRUE, options = list(`actions-box` = TRUE, `live-search` = TRUE, `size` = 10))
        })
    })
    
    observeEvent(c(input$x_variable, input$y_variable, input$x_values, input$y_values), {
        req(input$x_variable, input$y_variable, input$x_values, input$y_values)
        
        output$interactive_plot <- renderPlotly({
            data <- pbdb_data()
            req(data)
            
            # Ensure the selected variables exist in the dataset
            if(input$x_variable %in% colnames(data) && input$y_variable %in% colnames(data)){
                # Filtrar los datos para omitir los valores NA en las variables seleccionadas
                filtered_data <- data[!is.na(data[[input$x_variable]]) & !is.na(data[[input$y_variable]]), ]
                
                # Filtrar los datos en función de los valores seleccionados
                filtered_data <- filtered_data[filtered_data[[input$x_variable]] %in% input$x_values & filtered_data[[input$y_variable]] %in% input$y_values, ]
                
                gg <- ggplot(filtered_data, aes_string(y = input$x_variable, fill = input$y_variable)) +
                      geom_bar(position = "dodge") +
                      theme_minimal() +
                      ylab(variable_names[input$x_variable]) +
                      xlab(paste("Count of", variable_names[input$y_variable])) +
                      ggtitle(paste("Count of occurrences of", variable_names[input$y_variable], "by", variable_names[input$x_variable])) +
                      theme(axis.text.y = element_text(angle = 0, vjust = 0.5, hjust = 1))
                
                ggplotly(gg)
            } else {
                ggplotly(ggplot() + ggtitle("Selected variables not found in the data"))
            }
        })
    })
}

shinyApp(ui, server)

Data source: The data is retrieved from the Paleobiology Database (PBDB) using the API available at paleobiodb.org.