# Load required libraries
library(eurostat)
library(dplyr)
library(lubridate)
library(openxlsx) # For Excel export
library(readr) # For CSV export
# Function to safely download and examine eurostat data
download_energy_data <- function(dataset_id) {
cat("Attempting to download:", dataset_id, "\n")
tryCatch({
data <- get_eurostat(dataset_id, time_format = "date", cache = TRUE)
cat("✓ Successfully downloaded", nrow(data), "rows\n")
cat(" Columns:", paste(names(data), collapse = ", "), "\n")
# Check for Russia in partner column
if ("partner" %in% names(data)) {
russia_entries <- unique(data$partner)[grepl("RU", unique(data$partner))]
if (length(russia_entries) > 0) {
cat(" ✓ Found Russia codes:", paste(russia_entries, collapse = ", "), "\n")
}
}
return(data)
}, error = function(e) {
cat("✗ Failed to download", dataset_id, ":", e$message, "\n")
return(NULL)
})
}
# List of energy import datasets to try
energy_datasets <- c(
"nrg_ti_gas", # Natural gas imports/exports
"nrg_ti_oil", # Oil and petroleum products
"nrg_ti_sff", # Solid fossil fuels (coal, etc.)
"nrg_ti_eh", # Energy, heating value
"nrg_te" # Energy trade flows
)
cat("=== DOWNLOADING ENERGY DATASETS ===\n")
# Download all available energy datasets
all_energy_data <- list()
for (dataset_id in energy_datasets) {
data <- download_energy_data(dataset_id)
if (!is.null(data)) {
data$dataset_source <- dataset_id
all_energy_data[[dataset_id]] <- data
}
}
# If no specific energy datasets work, try external trade with energy filter
if (length(all_energy_data) == 0) {
cat("\n=== TRYING EXTERNAL TRADE DATASET ===\n")
trade_data <- download_energy_data("ext_lt_maineu")
if (!is.null(trade_data)) {
# Filter for energy products (HS Chapter 27 or SITC section 3)
if ("sitc06" %in% names(trade_data)) {
energy_trade <- trade_data %>% filter(grepl("^3", sitc06))
cat("Filtered to", nrow(energy_trade), "energy product rows using SITC\n")
} else if ("hs2" %in% names(trade_data)) {
energy_trade <- trade_data %>% filter(hs2 == "27")
cat("Filtered to", nrow(energy_trade), "energy product rows using HS\n")
} else {
energy_trade <- trade_data # Use all if no product classification found
}
energy_trade$dataset_source <- "ext_lt_maineu_energy"
all_energy_data[["trade_energy"]] <- energy_trade
}
}
cat(paste("\n=== PROCESSING", length(all_energy_data), "DATASETS ===\n"))
# Combine all datasets
if (length(all_energy_data) > 0) {
# Combine all data
combined_data <- bind_rows(all_energy_data, .id = "dataset_id")
cat("Combined dataset dimensions:", nrow(combined_data), "rows x", ncol(combined_data), "columns\n")
# Filter for Russian data
cat("\n=== FILTERING FOR RUSSIAN IMPORTS ===\n")
# Find Russia in partner column
if ("partner" %in% names(combined_data)) {
russia_codes <- unique(combined_data$partner)[grepl("RU", unique(combined_data$partner))]
cat("Russia partner codes found:", paste(russia_codes, collapse = ", "), "\n")
russian_data <- combined_data %>%
filter(
partner %in% russia_codes,
!is.na(values),
values != 0
)
} else {
cat("No 'partner' column found. Looking for alternative columns...\n")
# Look for other geography columns that might contain Russia
geo_cols <- names(combined_data)[grepl("geo|country|partner", names(combined_data), ignore.case = TRUE)]
cat("Possible geography columns:", paste(geo_cols, collapse = ", "), "\n")
if (length(geo_cols) > 0) {
# Use first geography column found
geo_col <- geo_cols[1]
russia_codes <- unique(combined_data[[geo_col]])[grepl("RU", unique(combined_data[[geo_col]]))]
russian_data <- combined_data %>%
filter(
!!sym(geo_col) %in% russia_codes,
!is.na(values),
values != 0
)
} else {
stop("Could not identify geography column containing Russia")
}
}
cat("Russian import data:", nrow(russian_data), "rows\n")
if (nrow(russian_data) > 0) {
# Add readable date columns
russian_data <- russian_data %>%
mutate(
year = year(TIME_PERIOD),
month = month(TIME_PERIOD),
date_readable = format(TIME_PERIOD, "%Y-%m")
) %>%
select(dataset_source, everything()) %>% # Move dataset_source to front
arrange(TIME_PERIOD, dataset_source)
# Print summary information
cat("\n=== DATA SUMMARY ===\n")
cat("Date range:", as.character(min(russian_data$TIME_PERIOD)), "to", as.character(max(russian_data$TIME_PERIOD)), "\n")
cat("Datasets included:\n")
print(table(russian_data$dataset_source))
if ("geo" %in% names(russian_data)) {
cat("EU countries/regions:\n")
geo_counts <- table(russian_data$geo)
print(head(sort(geo_counts, decreasing = TRUE), 15))
}
cat("Value statistics:\n")
print(summary(russian_data$values))
# Create clean dataset for export
export_data <- russian_data %>%
select(
dataset_source,
date = TIME_PERIOD,
year,
month,
date_readable,
everything(),
-dataset_id # Remove redundant column if it exists
)
# Export to CSV
csv_filename <- "eu_russian_energy_imports.csv"
write_csv(export_data, csv_filename)
cat(paste("\n✓ Exported", nrow(export_data), "rows to", csv_filename, "\n"))
# Export to Excel with multiple sheets
excel_filename <- "eu_russian_energy_imports.xlsx"
# Create workbook
wb <- createWorkbook()
# Add main data sheet
addWorksheet(wb, "All_Data")
writeData(wb, "All_Data", export_data)
# Add summary sheet by dataset
if ("dataset_source" %in% names(export_data)) {
summary_by_dataset <- export_data %>%
group_by(dataset_source, year) %>%
summarise(
total_value = sum(values, na.rm = TRUE),
observations = n(),
countries = n_distinct(geo, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(dataset_source, year)
addWorksheet(wb, "Summary_by_Dataset")
writeData(wb, "Summary_by_Dataset", summary_by_dataset)
}
# Add summary sheet by country/region
if ("geo" %in% names(export_data)) {
summary_by_country <- export_data %>%
group_by(geo, year) %>%
summarise(
total_value = sum(values, na.rm = TRUE),
observations = n(),
datasets = n_distinct(dataset_source, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(total_value))
addWorksheet(wb, "Summary_by_Country")
writeData(wb, "Summary_by_Country", summary_by_country)
}
# Add metadata sheet
metadata <- data.frame(
Information = c(
"Export Date",
"Total Rows",
"Date Range",
"Datasets Included",
"Description"
),
Value = c(
as.character(Sys.Date()),
nrow(export_data),
paste(min(export_data$year), "-", max(export_data$year)),
paste(unique(export_data$dataset_source), collapse = ", "),
"EU imports of energy products from Russia, sourced from Eurostat"
)
)
addWorksheet(wb, "Metadata")
writeData(wb, "Metadata", metadata)
# Save workbook
saveWorkbook(wb, excel_filename, overwrite = TRUE)
cat(paste("✓ Exported", nrow(export_data), "rows to", excel_filename, "with multiple sheets\n"))
# Print first few rows as preview
cat("\n=== DATA PREVIEW ===\n")
print(head(export_data, 10))
cat("\n=== FILES CREATED ===\n")
cat("1.", csv_filename, "- Raw data in CSV format\n")
cat("2.", excel_filename, "- Excel file with multiple sheets:\n")
cat(" - All_Data: Complete dataset\n")
cat(" - Summary_by_Dataset: Aggregated by data source\n")
cat(" - Summary_by_Country: Aggregated by country/region\n")
cat(" - Metadata: Information about the export\n")
} else {
cat("No Russian import data found in the datasets.\n")
}
} else {
cat("No energy datasets could be downloaded successfully.\n")
cat("Please check your internet connection and Eurostat API access.\n")
}