## ----include = FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # Save current options and apply new ones for the vignette rendering old_options <- options(width = 75, datatable.print.width = 80) ## ----setup, message=FALSE, warning=FALSE--------------------------------- library(crystract) library(data.table) ## ----full-pipeline-with-comments, eval=TRUE------------------------------ # IMPORTANT: Update this path to point to your own downloaded CIF file. # 1. Try to find the file in the installed package cif_path <- system.file("extdata", "1590946.cif", package = "crystract") # 2. If that fails, look in the source directory if (cif_path == "") { cif_path <- "../inst/extdata/1590946.cif" } # 3. Final check to provide a clear error if both fail if (!file.exists(cif_path)) { stop("Could not find 1590946.cif in installed package or inst/extdata/") } # Run the pipeline on our single example file, extracting multiple bonding types at once analysis_results <- analyze_single_cif( cif_path, bonding_algorithms = c("minimum_distance", "brunner", "econ", "voronoi", "crystal_nn") ) # Let's inspect the structure of the output table. # It's a single row containing all our results in nested data.tables. str(analysis_results, max.level = 2) ## ----access-nested-data, eval=FALSE-------------------------------------- # # The result is a list-column, so we access the element with [[1]] # final_bonds <- analysis_results$bonds_minimum_distance[[1]] # # print(head(final_bonds)) ## ----load-data, eval=TRUE------------------------------------------------ # The path was defined in the previous section: cif_data_list <- read_cif_files(cif_path) # We'll work with the content of the first file cif_content <- cif_data_list[[1]] # Let's look at the first few lines of the raw data knitr::kable( head(cif_content), caption = "First 6 lines of the raw CIF data." ) ## ----extract-metadata, eval=TRUE----------------------------------------- database_code <- extract_database_code(cif_content) chemical_formula <- extract_chemical_formula(cif_content) space_group_name <- extract_space_group_name(cif_content) space_group_number <- extract_space_group_number(cif_content) cat("Database Code:", database_code, "\n") cat("Chemical Formula:", chemical_formula, "\n") cat("Space Group:", space_group_name, "(No.", space_group_number, ")\n") ## ----extract-metrics, eval=TRUE------------------------------------------ unit_cell_metrics <- extract_unit_cell_metrics(cif_content) print(unit_cell_metrics) ## ----extract-coords-symm, eval=TRUE-------------------------------------- # Extract the coordinates of the unique atoms in the asymmetric unit atomic_coordinates <- extract_atomic_coordinates(cif_content) print("Asymmetric Atomic Coordinates:") print(atomic_coordinates) # Extract the symmetry operations symmetry_operations <- extract_symmetry_operations(cif_content) print("Symmetry Operations (first 6 of 8):") print(head(symmetry_operations)) ## ----generate-structure, eval=TRUE--------------------------------------- # Apply symmetry to generate all atoms in the primary unit cell transformed_coords <- apply_symmetry_operations(atomic_coordinates, symmetry_operations, unit_cell_metrics) print("Unique atoms in full unit cell (first 6):") print(head(transformed_coords)) # Expand into a supercell for neighbor calculations expanded_coords <- expand_transformed_coords(transformed_coords) print("Atoms in supercell (first 6):") print(head(expanded_coords)) ## ----calc-distances, eval=TRUE------------------------------------------- distances <- calculate_distances(atomic_coordinates, expanded_coords, unit_cell_metrics) print("Calculated Distances (shortest 6):") print(head(distances[order(Distance)])) ## ----calc-bonding-neighbors, eval=TRUE----------------------------------- # 1. Minimum Distance bonds_min <- minimum_distance(distances, delta = 0.1) # 2. Brunner's Method bonds_brunner <- brunner_nn_reciprocal(distances) # 3. Hoppe's ECoN bonds_econ <- econ_nn(distances, atomic_coordinates) # 4. Voronoi Tessellation bonds_voronoi <- voronoi_nn(atomic_coordinates, expanded_coords, unit_cell_metrics) # 5. CrystalNN bonds_crystal_nn <- crystal_nn(distances, atomic_coordinates, expanded_coords, unit_cell_metrics) cat("Minimum Distance found", nrow(bonds_min), "bonds.\n") cat("CrystalNN found", nrow(bonds_crystal_nn), "bonds.\n") # Calculate integer neighbor counts based on the bonded pairs (e.g., using CrystalNN) neighbor_counts <- calculate_neighbor_counts(bonds_crystal_nn) print("CrystalNN Neighbor Counts:") print(neighbor_counts) ## ----calc-angles, eval=TRUE---------------------------------------------- bond_angles <- calculate_angles( bonds_min, atomic_coordinates, expanded_coords, unit_cell_metrics ) print("Calculated Bond Angles (first 6):") print(head(bond_angles)) ## ----propagate-errors, eval=TRUE----------------------------------------- # Propagate errors for interatomic distances bonded_pairs_with_error <- propagate_distance_error( bonds_min, atomic_coordinates, unit_cell_metrics ) print("Bonded Pairs with Distance Error (first 6):") print(head(bonded_pairs_with_error)) # Propagate errors for bond angles bond_angles_with_error <- propagate_angle_error( bond_angles, atomic_coordinates, expanded_coords, unit_cell_metrics ) print("Bond Angles with Angle Error (first 6):") print(head(bond_angles_with_error)) ## ----eval=FALSE---------------------------------------------------------- # # In an interactive R session, you would run this: # filtered_bonds <- filter_atoms_by_symbol( # data_table = bonded_pairs_with_error, # atom_col = "Atom1" # Filter based on the central atom # ) ## ----filter-by-wyckoff, eval=TRUE---------------------------------------- # 1. In our example, the asymmetric atoms do not have Wyckoff information from the CIF. # We will mock them as "4c" for this demonstration to show how the function works. atomic_coordinates[, WyckoffSymbol := c("c", "c", "c")] atomic_coordinates[, WyckoffMultiplicity := c(4, 4, 4)] print("Atomic coordinates showing Wyckoff information:") print(atomic_coordinates[, .(Label, WyckoffSymbol, WyckoffMultiplicity)]) cat("\n") # 2. Filter bonds where the central atom is on the "4c" Wyckoff site. bonds_from_4c_site <- filter_by_wyckoff_symbol( data_table = bonded_pairs_with_error, atomic_coordinates = atomic_coordinates, atom_col = "Atom1", wyckoff_symbols = "4c" ) print(paste("Number of rows in original bond table:", nrow(bonded_pairs_with_error))) print(paste("Number of rows after filtering for site '4c':", nrow(bonds_from_4c_site))) ## ----filter-ghost-distances, eval=TRUE----------------------------------- # A distance d is kept if: (r1+r2)*(1-margin) <= d <= (r1+r2)*(1+margin) filtered_result <- filter_ghost_distances( distances = distances, atomic_coordinates = atomic_coordinates, margin = 0.1 # Default margin is 10% ) kept_distances <- filtered_result$kept removed_distances <- filtered_result$removed cat("Total distances calculated:", nrow(distances), "\n") cat("Distances kept after filtering:", nrow(kept_distances), "\n") cat("Unlikely / non-bonded distances removed:", nrow(removed_distances), "\n\n") print("Subset of removed distances (showing physically impossible / too long connections):") print(head(removed_distances)) ## ----filter-by-elements, eval=TRUE--------------------------------------- # Let's filter our bond table to exclude any bonds involving Strontium ("Sr"). # Since all bonds in this structure are Si-Sr, the result should be an empty table. bonds_without_sr <- filter_by_elements( distances = bonded_pairs_with_error, atomic_coordinates = atomic_coordinates, elements_to_exclude = "Sr" ) cat("Number of bonds in original table:", nrow(bonded_pairs_with_error), "\n") cat("Number of bonds after excluding 'Sr':", nrow(bonds_without_sr), "\n") ## ----calculate-weighted-average-distance, eval=TRUE---------------------- # Calculate the weighted average BOND distance for the entire Sr2Si network. # First, identify the bonds in the structure. We use the `bonds_min` table # created in section 1.2.6. # Then, define the Wyckoff sites belonging to the network. Here, it's just "4c". # (We assigned dummy "4c" and "4a" Wyckoff positions to our coordinates in section 2.2). network_wyckoff_sites <- "4c" # Apply the function to the table of identified bonds. # For this simple, ordered structure, all occupancies are 1.0, but the function # correctly applies the full formula. weighted_avg_bond_dist <- calculate_weighted_average_network_distance( distances = bonds_min, # Use the bond table as input atomic_coordinates = atomic_coordinates, wyckoff_symbols = network_wyckoff_sites ) cat("Weighted average network bond distance for the '4c' sites:", weighted_avg_bond_dist, "Å\n") ## ----custom-radii-and-batch, eval=FALSE---------------------------------- # # --- 1. Define and set the custom radii dictionary --- # radii_dict <- c( # Ac=2.15, Ag=1.45, Al=1.21, Am=1.8, Ar=1.06, As=1.19, At=1.5, Au=1.36, B=0.84, # Ba=2.15, Be=0.96, Bi=1.48, Br=1.2, C=0.73, Ca=1.76, Cd=1.44, Ce=2.04, Cl=1.02, # Cm=1.69, Co=1.38, Cr=1.39, Cs=2.44, Cu=1.32, Dy=1.92, Er=1.89, Eu=1.98, F=0.57, # Fe=1.42, Fr=2.6, Ga=1.22, Gd=1.96, Ge=1.2, H=0.31, He=0.28, Hf=1.75, Hg=1.32, # Ho=1.92, I=1.39, In=1.42, Ir=1.41, K=2.03, Kr=1.16, La=2.07, Li=1.28, Lu=1.87, # Mg=1.41, Mn=1.5, Mo=1.54, N=0.71, Na=1.66, Nb=1.64, Nd=2.01, Ne=0.58, Ni=1.24, # Np=1.9, O=0.66, Os=1.44, P=1.07, Pa=2, Pb=1.46, Pd=1.39, Pm=1.99, Po=1.4, Pr=2.03, # Pt=1.36, Pu=1.87, Ra=2.21, Rb=2.2, Re=1.51, Rh=1.42, Rn=1.5, Ru=1.46, S=1.05, # Sb=1.39, Sc=1.7, Se=1.2, Si=1.11, Sm=1.98, Sn=1.39, Sr=1.95, Ta=1.7, Tb=1.94, # Tc=1.47, Te=1.38, Th=2.06, Ti=1.6, Tl=1.45, Tm=1.9, U=1.96, V=1.53, W=1.62, # Xe=1.4, Y=1.9, Yb=1.87, Zn=1.22, Zr=1.75 # ) # # custom_radii_table <- data.table( # Symbol = names(radii_dict), # Radius = as.numeric(radii_dict), # Type = "covalent" # ) # # # Inject the custom table into the current crystract session # set_radii_data(custom_radii_table) # # # --- 2. Run the batch analysis --- # cat("Starting batch analysis on CIF files...\n") # results <- analyze_cif_files( # file_paths = "path/to/cif_directory", # tolerance = 1e-4, # bonding_algorithms = c("minimum_distance", "brunner", "econ", "voronoi", "crystal_nn"), # calculate_bond_angles = FALSE, # Skip angles to speed up extraction # perform_error_propagation = FALSE, # Skip uncertainties # workers = 4 # Adjust based on your available CPU cores # ) # # # --- 3. Extract and Save Coordination Numbers to CSV --- # # Create a dedicated folder for the outputs to avoid clutter # output_dir <- "path/to/output_directory" # if (!dir.exists(output_dir)) dir.create(output_dir) # # # Identify all coordination number columns generated by the pipeline # cn_columns <- grep("^cn_", names(results), value = TRUE) # # cat("\nExtracting coordination numbers and saving to CSV...\n") # # for (col in cn_columns) { # # # Extract the list of data.tables for this specific algorithm # list_of_dts <- results[[col]] # # # Associate each table with its CIF file name # names(list_of_dts) <- results$file_name # # # Unnest and bind all tables together into one giant table # combined_dt <- rbindlist(list_of_dts, idcol = "file_name", fill = TRUE) # # if (nrow(combined_dt) > 0) { # # Format the file path (e.g., "cn_crystal_nn_crystract.csv") # file_path <- file.path(output_dir, paste0(col, "_crystract.csv")) # # # Save to CSV # fwrite(combined_dt, file_path) # # cat(sprintf("Saved %d rows across %d files to: %s\n", # nrow(combined_dt), # uniqueN(combined_dt$file_name), # basename(file_path))) # } else { # cat(sprintf("No valid coordination data found for %s, skipping.\n", col)) # } # } # # cat("\nAll operations finished successfully! Files are in:", output_dir, "\n") ## ----include = FALSE---------------------------------------------------------- # Restore original options as required by CRAN options(old_options)