YMatrix
Quick Start
Simulate Time Series Scenarios
Standard Cluster Deployment
Data Modeling
Connecting to The database
Data Writing
Data Migration
Data Query
Scene Application Examples
Federal Query
Maintenance and Monitoring
Global Maintenance
Partition Maintenance
Backup and Restore
Cluster Expansion
Monitoring
Performance Tuning
Troubleshooting
Reference Guide
Tool Guide
Data Type
Storage Engine
Execution Engine
Configuration Parameters
Index
Extension
SQL Reference
FAQ
export HADOOP_HOME=/data/hadoop-2.9.2
export HADOOP_CONF_DIR=/data/hadoop-2.9.2/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH:
mkdir /data/hadoop-2.9.2/name
mkdir /data/hadoop-2.9.2/data
mkdir /data/hadoop-2.9.2/tmp
## Parameter configuration
cat hadoop-env.sh
export JAVA_HOME=/data/jdk1.8
vim $HADOOP_CONF_DIR/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://matrixdb01:9000</value>
</property>
<property>
<name>hadoop.proxyuser.mxadmin.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mxadmin.groups</name> --代表访问hadoop的用户组,必须加
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.mxadmin.users</name> --代表访问hadoop的用户,必须加
<value>*</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop-2.9.2/tmp</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/data/hadoop-2.9.2/name</value>
</property>
<property>
<name>hadoop.data.dir</name>
<value>/data/hadoop-2.9.2/data</value>
</property>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/yarn-site.xml
<?xml version="1.0"?>
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>matrixdb01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
vim $HADOOP_CONF_DIR/slaves
matrixdb01
matrixdb02
matrixdb03
scp -r /data/hadoop-2.9.2 matrixdb02:/data/
scp -r /data/hadoop-2.9.2 matrixdb03:/data/
[mxadmin@matrixdb01 hadoop]$ start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [matrixdb01]
matrixdb01: starting namenode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-namenode-matrixdb01.out
matrixdb03: starting datanode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-datanode-matrixdb03.out
matrixdb01: starting datanode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-datanode-matrixdb01.out
matrixdb02: starting datanode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-datanode-matrixdb02.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /data/hadoop-2.9.2/logs/hadoop-mxadmin-secondarynamenode-matrixdb01.out
starting yarn daemons
resourcemanager running as process 10127. Stop it first.
matrixdb03: starting nodemanager, logging to /data/hadoop-2.9.2/logs/yarn-mxadmin-nodemanager-matrixdb03.out
matrixdb02: starting nodemanager, logging to /data/hadoop-2.9.2/logs/yarn-mxadmin-nodemanager-matrixdb02.out
matrixdb01: starting nodemanager, logging to /data/hadoop-2.9.2/logs/yarn-mxadmin-nodemanager-matrixdb01.out
[mxadmin@matrixdb01 hadoop]$ jps
11889 NameNode
13010 Jps
12484 NodeManager
12247 SecondaryNameNode
12058 DataNode
10127 ResourceManager
Create a folder:
[mxadmin@matrixdb01 hadoop]$ hdfs dfs -mkdir /greenplum
[mxadmin@matrixdb01 data]$ hdfs dfs -mkdir /greenplum/pxf_examples
[mxadmin@matrixdb01 data]$ hdfs dfs -ls /
drwxr-xr-x - mxadmin supergroup 0 2020-12-30 11:14 /greenplum
[mxadmin@matrixdb01 data]$ hdfs dfs -ls /greenplum/
drwxr-xr-x - mxadmin supergroup 0 2020-12-30 11:14 /greenplum/pxf_examples
[mxadmin@matrixdb01 data] echo 'Prague,Jan,101,4875.33
Rome,Mar,87,1557.39
Bangalore,May,317,8936.99
Beijing,Jul,411,11600.67' > pxf_hdfs_simple.txt
hdfs dfs -put /data/pxf_hdfs_simple.txt /greenplum/pxf_examples/
[mxadmin@matrixdb01 data]$ hdfs dfs -cat /greenplum/pxf_examples/pxf_hdfs_simple.txt
Prague,Jan,101,4875.33
Rome,Mar,87,1557.39
Bangalore,May,317,8936.99
Beijing,Jul,411,11600.67
PXF has a Default Server by default, stored in $PXF_CONF/servers/default
mkdir $PXF_CONF/servers/single_hdfs
cp $HADOOP_CONF_DIR/core-site.xml $PXF_CONF/servers/single_hdfs/
cp $HADOOP_CONF_DIR/hdfs-site.xml $PXF_CONF/servers/single_hdfs/
scp -r single_hdfs matrixdb02:pwd
scp -r single_hdfs matrixdb03:pwd
createdb pxfhdfs
pxfhdfs=# CREATE EXTENSION pxf_fdw ;
CREATE SERVER hdfs_svr FOREIGN DATA WRAPPER hdfs_pxf_fdw OPTIONS ( config 'single_hdfs' );
CREATE USER MAPPING FOR mxadmin SERVER hdfs_svr;
CREATE FOREIGN TABLE pxf_hdfs_table (location text, month text,
num_orders int, total_sales float8) SERVER hdfs_svr OPTIONS ( resource '/greenplum/pxf_examples/pxf_hdfs_simple.txt', format 'text',
delimiter ',');
pxfhdfs=# SELECT * FROM pxf_hdfs_table ;
location | month | num_orders | total_sales
-----------+----------------------------------------------------------------------------------------------------------------------
Prague | Jan | 101 | 4875.33
Rome | Mar | 87 | 1557.39
Bangalore | May | 317 | 8936.99
Beijing | Jul | 411 | 11600.67
(4 rows)
Make sure the HDFS directory is created
CREATE FOREIGN TABLE pxf_hdfsdir_table (location text, month
text, num_orders int, total_sales float8) SERVER hdfs_svr OPTIONS (resource '/greenplum/pxf_dir_examples', format 'text', delimiter ',');
pxfhdfs=# INSERT INTO pxf_hdfsdir_table SELECT * FROM pxf_hdfs_table ;
INSERT 0 4
pxfhdfs=# SELECT COUNT(*) FROM pxf_hdfsdir_table ;
count
-------
4
(1 row)